diff options
Diffstat (limited to 'tools')
304 files changed, 8842 insertions, 3086 deletions
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index 6e060907c163..d25534940bde 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -58,8 +58,7 @@ #define ORC_TYPE_CALL 0 #define ORC_TYPE_REGS 1 #define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_SAVE 3 -#define UNWIND_HINT_TYPE_RESTORE 4 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 #ifndef __ASSEMBLY__ /* diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h index 196fdd02b8b1..30d7d04d72d6 100644 --- a/tools/arch/x86/include/uapi/asm/unistd.h +++ b/tools/arch/x86/include/uapi/asm/unistd.h @@ -3,7 +3,7 @@ #define _UAPI_ASM_X86_UNISTD_H /* x32 syscall flag bit */ -#define __X32_SYSCALL_BIT 0x40000000UL +#define __X32_SYSCALL_BIT 0x40000000 #ifndef __KERNEL__ # ifdef __i386__ diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 16b9a420e6fd..0efaf45f7367 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -314,6 +314,7 @@ int apply_xbc(const char *path, const char *xbc_path) ret = delete_xbc(path); if (ret < 0) { pr_err("Failed to delete previous boot config: %d\n", ret); + free(data); return ret; } @@ -321,24 +322,27 @@ int apply_xbc(const char *path, const char *xbc_path) fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { pr_err("Failed to open %s: %d\n", path, fd); + free(data); return fd; } /* TODO: Ensure the @path is initramfs/initrd image */ ret = write(fd, data, size + 8); if (ret < 0) { pr_err("Failed to apply a boot config: %d\n", ret); - return ret; + goto out; } /* Write a magic word of the bootconfig */ ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); if (ret < 0) { pr_err("Failed to apply a boot config magic: %d\n", ret); - return ret; + goto out; } + ret = 0; +out: close(fd); free(data); - return 0; + return ret; } int usage(void) diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index 0fe0d584c57e..e17738479edc 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -479,6 +479,7 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { + struct bpf_object_load_attr load_attr = {}; const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); @@ -499,7 +500,12 @@ static int do_register(int argc, char **argv) set_max_rlimit(); - if (bpf_object__load(obj)) { + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + if (bpf_object__load_xattr(&load_attr)) { bpf_object__close(obj); return -1; } diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 39edd68afa8e..8a6f82e56a24 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -8,7 +8,7 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) BPFOBJ := $(OUTPUT)/libbpf.a BPF_INCLUDE := $(OUTPUT) -INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib) +INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) CFLAGS := -g -Wall # Try to detect best kernel BTF source diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 3e0c019ef297..3abd4316cd4f 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA := \ llvm \ llvm-version \ clang \ - libbpf + libbpf \ + libpfm4 FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 92012381393a..84f845b9627d 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -69,7 +69,8 @@ FILES= \ test-libaio.bin \ test-libzstd.bin \ test-clang-bpf-global-var.bin \ - test-file-handle.bin + test-file-handle.bin \ + test-libpfm4.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -331,6 +332,9 @@ $(OUTPUT)test-clang-bpf-global-var.bin: $(OUTPUT)test-file-handle.bin: $(BUILD) +$(OUTPUT)test-libpfm4.bin: + $(BUILD) -lpfm + ############################### clean: diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index 2b0e02c38870..1547bc2c0950 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0)) +#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 0.11.0 is required" +#error "OpenCSD >= 0.14.0 is required" #endif int main(void) diff --git a/tools/build/feature/test-libpfm4.c b/tools/build/feature/test-libpfm4.c new file mode 100644 index 000000000000..af49b259459e --- /dev/null +++ b/tools/build/feature/test-libpfm4.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/types.h> +#include <perfmon/pfmlib.h> + +int main(void) +{ + pfm_initialize(); + return 0; +} diff --git a/tools/build/feature/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c index 1e38d1930a97..3bc6b0768a53 100644 --- a/tools/build/feature/test-sync-compare-and-swap.c +++ b/tools/build/feature/test-sync-compare-and-swap.c @@ -7,7 +7,7 @@ int main(int argc, char *argv[]) { uint64_t old, new = argc; - argv = argv; + (void)argv; do { old = __sync_val_compare_and_swap(&x, 0, 0); } while (!__sync_bool_compare_and_swap(&x, old, new)); diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h index e03b1ea23e0e..30dd21f976c3 100644 --- a/tools/include/linux/rbtree.h +++ b/tools/include/linux/rbtree.h @@ -11,7 +11,7 @@ I know it's not the cleaner way, but in C (not in C++) to get performances and genericity... - See Documentation/rbtree.txt for documentation and samples. + See Documentation/core-api/rbtree.rst for documentation and samples. */ #ifndef __TOOLS_LINUX_PERF_RBTREE_H diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h index 381aa948610d..570bb9794421 100644 --- a/tools/include/linux/rbtree_augmented.h +++ b/tools/include/linux/rbtree_augmented.h @@ -23,7 +23,7 @@ * rb_insert_augmented() and rb_erase_augmented() are intended to be public. * The rest are implementation details you are not expected to depend on. * - * See Documentation/rbtree.txt for documentation and samples. + * See Documentation/core-api/rbtree.rst for documentation and samples. */ struct rb_augment_callbacks { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2e29a671d67e..7bbf1b65be10 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1642,7 +1642,7 @@ union bpf_attr { * ifindex, but doesn't require a map to do so. * Return * **XDP_REDIRECT** on success, or the value of the two lower bits - * of the **flags* argument on error. + * of the *flags* argument on error. * * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 428c7dde6b4b..fdd632c833b4 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -116,7 +116,7 @@ struct kvm_irq_level { * ACPI gsi notion of irq. * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. - * For ARM: See Documentation/virt/kvm/api.txt + * For ARM: See Documentation/virt/kvm/api.rst */ union { __u32 irq; @@ -1107,7 +1107,7 @@ struct kvm_xen_hvm_config { * * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies * the irqfd to operate in resampling mode for level triggered interrupt - * emulation. See Documentation/virt/kvm/api.txt. + * emulation. See Documentation/virt/kvm/api.rst. */ #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index ad80a5c885d5..d1192783139a 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -148,9 +148,18 @@ struct statx { #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ #define STATX_BTIME 0x00000800U /* Want/got stx_btime */ -#define STATX_ALL 0x00000fffU /* All currently supported flags */ + #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ +#ifndef __KERNEL__ +/* + * This is deprecated, and shall remain the same value in the future. To avoid + * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME) + * instead. + */ +#define STATX_ALL 0x00000fffU +#endif + /* * Attributes to be found in stx_attributes and masked in stx_attributes_mask. * diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 027b18f7ed8c..82f53d81a7a7 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -90,6 +90,7 @@ struct fs { const char * const *mounts; char path[PATH_MAX]; bool found; + bool checked; long magic; }; @@ -111,31 +112,37 @@ static struct fs fs__entries[] = { .name = "sysfs", .mounts = sysfs__fs_known_mountpoints, .magic = SYSFS_MAGIC, + .checked = false, }, [FS__PROCFS] = { .name = "proc", .mounts = procfs__known_mountpoints, .magic = PROC_SUPER_MAGIC, + .checked = false, }, [FS__DEBUGFS] = { .name = "debugfs", .mounts = debugfs__known_mountpoints, .magic = DEBUGFS_MAGIC, + .checked = false, }, [FS__TRACEFS] = { .name = "tracefs", .mounts = tracefs__known_mountpoints, .magic = TRACEFS_MAGIC, + .checked = false, }, [FS__HUGETLBFS] = { .name = "hugetlbfs", .mounts = hugetlbfs__known_mountpoints, .magic = HUGETLBFS_MAGIC, + .checked = false, }, [FS__BPF_FS] = { .name = "bpf", .mounts = bpf_fs__known_mountpoints, .magic = BPF_FS_MAGIC, + .checked = false, }, }; @@ -158,6 +165,7 @@ static bool fs__read_mounts(struct fs *fs) } fclose(fp); + fs->checked = true; return fs->found = found; } @@ -220,6 +228,7 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; + fs->checked = true; strncpy(fs->path, override_path, sizeof(fs->path) - 1); fs->path[sizeof(fs->path) - 1] = '\0'; return true; @@ -246,6 +255,14 @@ static const char *fs__mountpoint(int idx) if (fs->found) return (const char *)fs->path; + /* the mount point was already checked for the mount point + * but and did not exist, so return NULL to avoid scanning again. + * This makes the found and not found paths cost equivalent + * in case of multiple calls. + */ + if (fs->checked) + return NULL; + return fs__get_mountpoint(fs); } diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index 936edb95e1f3..aa222ca30311 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -18,6 +18,18 @@ const char *name##__mount(void); \ bool name##__configured(void); \ +/* + * The xxxx__mountpoint() entry points find the first match mount point for each + * filesystems listed below, where xxxx is the filesystem type. + * + * The interface is as follows: + * + * - If a mount point is found on first call, it is cached and used for all + * subsequent calls. + * + * - If a mount point is not found, NULL is returned on first call and all + * subsequent calls. + */ FS(sysfs) FS(procfs) FS(debugfs) diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h new file mode 100644 index 000000000000..777c20f6b604 --- /dev/null +++ b/tools/lib/api/io.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Lightweight buffered reading library. + * + * Copyright 2019 Google LLC. + */ +#ifndef __API_IO__ +#define __API_IO__ + +#include <stdlib.h> +#include <unistd.h> + +struct io { + /* File descriptor being read/ */ + int fd; + /* Size of the read buffer. */ + unsigned int buf_len; + /* Pointer to storage for buffering read. */ + char *buf; + /* End of the storage. */ + char *end; + /* Currently accessed data pointer. */ + char *data; + /* Set true on when the end of file on read error. */ + bool eof; +}; + +static inline void io__init(struct io *io, int fd, + char *buf, unsigned int buf_len) +{ + io->fd = fd; + io->buf_len = buf_len; + io->buf = buf; + io->end = buf; + io->data = buf; + io->eof = false; +} + +/* Reads one character from the "io" file with similar semantics to fgetc. */ +static inline int io__get_char(struct io *io) +{ + char *ptr = io->data; + + if (io->eof) + return -1; + + if (ptr == io->end) { + ssize_t n = read(io->fd, io->buf, io->buf_len); + + if (n <= 0) { + io->eof = true; + return -1; + } + ptr = &io->buf[0]; + io->end = &io->buf[n]; + } + io->data = ptr + 1; + return *ptr; +} + +/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the + * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise + * returns the character after the hexadecimal value which may be -1 for eof. + * If the read value is larger than a u64 the high-order bits will be dropped. + */ +static inline int io__get_hex(struct io *io, __u64 *hex) +{ + bool first_read = true; + + *hex = 0; + while (true) { + int ch = io__get_char(io); + + if (ch < 0) + return ch; + if (ch >= '0' && ch <= '9') + *hex = (*hex << 4) | (ch - '0'); + else if (ch >= 'a' && ch <= 'f') + *hex = (*hex << 4) | (ch - 'a' + 10); + else if (ch >= 'A' && ch <= 'F') + *hex = (*hex << 4) | (ch - 'A' + 10); + else if (first_read) + return -2; + else + return ch; + first_read = false; + } +} + +/* Read a positive decimal value with out argument dec. If the first character + * isn't a decimal returns -2, io->eof returns -1, otherwise returns the + * character after the decimal value which may be -1 for eof. If the read value + * is larger than a u64 the high-order bits will be dropped. + */ +static inline int io__get_dec(struct io *io, __u64 *dec) +{ + bool first_read = true; + + *dec = 0; + while (true) { + int ch = io__get_char(io); + + if (ch < 0) + return ch; + if (ch >= '0' && ch <= '9') + *dec = (*dec * 10) + ch - '0'; + else if (first_read) + return -2; + else + return ch; + first_read = false; + } +} + +#endif /* __API_IO__ */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index f3f3c3fb98cb..48a9c7c69ef1 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -148,11 +148,11 @@ struct pt_regs; #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14]) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) #elif defined(bpf_target_arm) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 0b709fd10bba..312f887570b2 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -321,6 +321,8 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) { + flags &= XDP_FLAGS_MODES; + if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) return info->prog_id; if (flags & XDP_FLAGS_DRV_MODE) diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index f93f4e703e4c..ca0215047c32 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -247,7 +247,7 @@ out: int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { - if (idx < cpus->nr) + if (cpus && idx < cpus->nr) return cpus->map[idx]; return -1; diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 5b9f2ca50591..6a875a0f01bb 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -11,10 +11,8 @@ #include <internal/mmap.h> #include <internal/cpumap.h> #include <internal/threadmap.h> -#include <internal/xyarray.h> #include <internal/lib.h> #include <linux/zalloc.h> -#include <sys/ioctl.h> #include <stdlib.h> #include <errno.h> #include <unistd.h> @@ -125,8 +123,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { perf_cpu_map__put(evlist->cpus); + perf_cpu_map__put(evlist->all_cpus); perf_thread_map__put(evlist->threads); evlist->cpus = NULL; + evlist->all_cpus = NULL; evlist->threads = NULL; fdarray__exit(&evlist->pollfd); } diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index af9def589863..d2414144eb8c 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -151,6 +151,8 @@ struct option { { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) } +#define OPT_CALLBACK_SET(s, l, v, os, a, h, f) \ + { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .set = check_vtype(os, bool *)} #define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } #define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c index 1a7a9f877095..e335ac2b9e19 100644 --- a/tools/lib/symbol/kallsyms.c +++ b/tools/lib/symbol/kallsyms.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "symbol/kallsyms.h" +#include "api/io.h" #include <stdio.h> -#include <stdlib.h> +#include <sys/stat.h> +#include <fcntl.h> u8 kallsyms2elf_type(char type) { @@ -15,74 +17,62 @@ bool kallsyms__is_function(char symbol_type) return symbol_type == 'T' || symbol_type == 'W'; } -/* - * While we find nice hex chars, build a long_val. - * Return number of chars processed. - */ -int hex2u64(const char *ptr, u64 *long_val) +static void read_to_eol(struct io *io) { - char *p; + int ch; - *long_val = strtoull(ptr, &p, 16); - - return p - ptr; + for (;;) { + ch = io__get_char(io); + if (ch < 0 || ch == '\n') + return; + } } int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, char type, u64 start)) { - char *line = NULL; - size_t n; - int err = -1; - FILE *file = fopen(filename, "r"); - - if (file == NULL) - goto out_failure; - - err = 0; + struct io io; + char bf[BUFSIZ]; + int err; - while (!feof(file)) { - u64 start; - int line_len, len; - char symbol_type; - char *symbol_name; + io.fd = open(filename, O_RDONLY, 0); - line_len = getline(&line, &n, file); - if (line_len < 0 || !line) - break; + if (io.fd < 0) + return -1; - line[--line_len] = '\0'; /* \n */ + io__init(&io, io.fd, bf, sizeof(bf)); - len = hex2u64(line, &start); + err = 0; + while (!io.eof) { + __u64 start; + int ch; + size_t i; + char symbol_type; + char symbol_name[KSYM_NAME_LEN + 1]; - /* Skip the line if we failed to parse the address. */ - if (!len) + if (io__get_hex(&io, &start) != ' ') { + read_to_eol(&io); continue; - - len++; - if (len + 2 >= line_len) + } + symbol_type = io__get_char(&io); + if (io__get_char(&io) != ' ') { + read_to_eol(&io); continue; - - symbol_type = line[len]; - len += 2; - symbol_name = line + len; - len = line_len - len; - - if (len >= KSYM_NAME_LEN) { - err = -1; - break; } + for (i = 0; i < sizeof(symbol_name); i++) { + ch = io__get_char(&io); + if (ch < 0 || ch == '\n') + break; + symbol_name[i] = ch; + } + symbol_name[i] = '\0'; err = process_symbol(arg, symbol_name, symbol_type, start); if (err) break; } - free(line); - fclose(file); + close(io.fd); return err; - -out_failure: - return -1; } diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h index bd988f7b18d4..72ab9870454b 100644 --- a/tools/lib/symbol/kallsyms.h +++ b/tools/lib/symbol/kallsyms.h @@ -18,8 +18,6 @@ static inline u8 kallsyms2elf_binding(char type) return isupper(type) ? STB_GLOBAL : STB_LOCAL; } -int hex2u64(const char *ptr, u64 *long_val); - u8 kallsyms2elf_type(char type); bool kallsyms__is_function(char symbol_type); diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index b887e7437d67..27f3b07fdae8 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -438,7 +438,7 @@ void *kbuffer_translate_data(int swap, void *data, unsigned int *size) case KBUFFER_TYPE_TIME_EXTEND: case KBUFFER_TYPE_TIME_STAMP: return NULL; - }; + } *size = length; diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 20eed719542e..c271aeeb227d 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1958,7 +1958,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a default: break; } - asprintf(&str, val ? "TRUE" : "FALSE"); + if (asprintf(&str, val ? "TRUE" : "FALSE") < 0) + str = NULL; break; } } @@ -1976,7 +1977,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a break; } - asprintf(&str, "(%s) %s (%s)", left, op, right); + if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0) + str = NULL; break; case TEP_FILTER_OP_NOT: @@ -1992,10 +1994,12 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a right_val = 0; if (right_val >= 0) { /* just return the opposite */ - asprintf(&str, right_val ? "FALSE" : "TRUE"); + if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0) + str = NULL; break; } - asprintf(&str, "%s(%s)", op, right); + if (asprintf(&str, "%s(%s)", op, right) < 0) + str = NULL; break; default: @@ -2011,7 +2015,8 @@ static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg * { char *str = NULL; - asprintf(&str, "%lld", arg->value.val); + if (asprintf(&str, "%lld", arg->value.val) < 0) + str = NULL; return str; } @@ -2069,7 +2074,8 @@ static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg * break; } - asprintf(&str, "%s %s %s", lstr, op, rstr); + if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0) + str = NULL; out: free(lstr); free(rstr); @@ -2113,7 +2119,8 @@ static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg * if (!op) op = "<="; - asprintf(&str, "%s %s %s", lstr, op, rstr); + if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0) + str = NULL; break; default: @@ -2148,8 +2155,9 @@ static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg * if (!op) op = "!~"; - asprintf(&str, "%s %s \"%s\"", - arg->str.field->name, op, arg->str.val); + if (asprintf(&str, "%s %s \"%s\"", + arg->str.field->name, op, arg->str.val) < 0) + str = NULL; break; default: @@ -2165,7 +2173,8 @@ static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg * switch (arg->type) { case TEP_FILTER_ARG_BOOLEAN: - asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE"); + if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0) + str = NULL; return str; case TEP_FILTER_ARG_OP: diff --git a/tools/objtool/Build b/tools/objtool/Build index 66f44f5cd2a6..b7222d5cc7bc 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -1,11 +1,16 @@ objtool-y += arch/$(SRCARCH)/ + +objtool-y += weak.o + +objtool-$(SUBCMD_CHECK) += check.o +objtool-$(SUBCMD_CHECK) += special.o +objtool-$(SUBCMD_ORC) += check.o +objtool-$(SUBCMD_ORC) += orc_gen.o +objtool-$(SUBCMD_ORC) += orc_dump.o + objtool-y += builtin-check.o objtool-y += builtin-orc.o -objtool-y += check.o -objtool-y += orc_gen.o -objtool-y += orc_dump.o objtool-y += elf.o -objtool-y += special.o objtool-y += objtool.o objtool-y += libstring.o diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index de094670050b..0542e46c7552 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -289,6 +289,47 @@ they mean, and suggestions for how to fix them. might be corrupt due to a gcc bug. For more details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 +9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled + + This means that an unexpected call to a non-whitelisted function exists + outside of arch-specific guards. + X86: SMAP (stac/clac): __uaccess_begin()/__uaccess_end() + ARM: PAN: uaccess_enable()/uaccess_disable() + + These functions should be called to denote a minimal critical section around + access to __user variables. See also: https://lwn.net/Articles/517475/ + + The intention of the warning is to prevent calls to funcB() from eventually + calling schedule(), potentially leaking the AC flags state, and not + restoring them correctly. + + It also helps verify that there are no unexpected calls to funcB() which may + access user space pages with protections against doing so disabled. + + To fix, either: + 1) remove explicit calls to funcB() from funcA(). + 2) add the correct guards before and after calls to low level functions like + __get_user_size()/__put_user_size(). + 3) add funcB to uaccess_safe_builtin whitelist in tools/objtool/check.c, if + funcB obviously does not call schedule(), and is marked notrace (since + function tracing inserts additional calls, which is not obvious from the + sources). + +10. file.o: warning: func()+0x5c: alternative modifies stack + + This means that an alternative includes instructions that modify the + stack. The problem is that there is only one ORC unwind table, this means + that the ORC unwind entries must be valid for each of the alternatives. + The easiest way to enforce this is to ensure alternatives do not contain + any ORC entries, which in turn implies the above constraint. + +11. file.o: warning: unannotated intra-function call + + This warning means that a direct call is done to a destination which + is not at the beginning of a function. If this is a legit call, you + can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL + directive right before the call. + If the error doesn't seem to make sense, it could be a bug in objtool. Feel free to ask the objtool maintainer for help. diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index f591c4d1b6fe..7770edcda3a0 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -35,7 +35,8 @@ all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ - -I$(srctree)/tools/arch/$(SRCARCH)/include + -I$(srctree)/tools/arch/$(SRCARCH)/include \ + -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) @@ -45,14 +46,24 @@ elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED) AWK = awk + +SUBCMD_CHECK := n +SUBCMD_ORC := n + +ifeq ($(SRCARCH),x86) + SUBCMD_CHECK := y + SUBCMD_ORC := y +endif + +export SUBCMD_CHECK SUBCMD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include $(OBJTOOL_IN): fixdep FORCE + @$(CONFIG_SHELL) ./sync-check.sh @$(MAKE) $(build)=objtool $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN) - @$(CONFIG_SHELL) ./sync-check.sh $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@ diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index ced3765c4f44..eda15a5a285e 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -8,9 +8,11 @@ #include <stdbool.h> #include <linux/list.h> -#include "elf.h" +#include "objtool.h" #include "cfi.h" +#include <asm/orc_types.h> + enum insn_type { INSN_JUMP_CONDITIONAL, INSN_JUMP_UNCONDITIONAL, @@ -20,7 +22,6 @@ enum insn_type { INSN_CALL_DYNAMIC, INSN_RETURN, INSN_CONTEXT_SWITCH, - INSN_STACK, INSN_BUG, INSN_NOP, INSN_STAC, @@ -64,15 +65,23 @@ struct op_src { struct stack_op { struct op_dest dest; struct op_src src; + struct list_head list; }; -void arch_initial_func_cfi_state(struct cfi_state *state); +struct instruction; + +void arch_initial_func_cfi_state(struct cfi_init_state *state); -int arch_decode_instruction(struct elf *elf, struct section *sec, +int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, - unsigned long *immediate, struct stack_op *op); + unsigned long *immediate, + struct list_head *ops_list); bool arch_callee_saved_reg(unsigned char reg); +unsigned long arch_jump_destination(struct instruction *insn); + +unsigned long arch_dest_rela_offset(int addend); + #endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index a62e032863a8..4b504fc90bbb 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -11,6 +11,7 @@ #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" +#include "../../check.h" #include "../../elf.h" #include "../../arch.h" #include "../../warn.h" @@ -26,7 +27,7 @@ static unsigned char op_to_cfi_reg[][2] = { {CFI_DI, CFI_R15}, }; -static int is_x86_64(struct elf *elf) +static int is_x86_64(const struct elf *elf) { switch (elf->ehdr.e_machine) { case EM_X86_64: @@ -66,16 +67,34 @@ bool arch_callee_saved_reg(unsigned char reg) } } -int arch_decode_instruction(struct elf *elf, struct section *sec, +unsigned long arch_dest_rela_offset(int addend) +{ + return addend + 4; +} + +unsigned long arch_jump_destination(struct instruction *insn) +{ + return insn->offset + insn->len + insn->immediate; +} + +#define ADD_OP(op) \ + if (!(op = calloc(1, sizeof(*op)))) \ + return -1; \ + else for (list_add_tail(&op->list, ops_list); op; op = NULL) + +int arch_decode_instruction(const struct elf *elf, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, - unsigned long *immediate, struct stack_op *op) + unsigned long *immediate, + struct list_head *ops_list) { struct insn insn; int x86_64, sign; unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0; + struct stack_op *op = NULL; + struct symbol *sym; x86_64 = is_x86_64(elf); if (x86_64 == -1) @@ -85,7 +104,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, insn_get_length(&insn); if (!insn_complete(&insn)) { - WARN_FUNC("can't decode instruction", sec, offset); + WARN("can't decode instruction at %s:0x%lx", sec->name, offset); return -1; } @@ -123,40 +142,44 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { /* add/sub reg, %rsp */ - *type = INSN_STACK; - op->src.type = OP_SRC_ADD; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } } break; case 0x50 ... 0x57: /* push reg */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; - op->dest.type = OP_DEST_PUSH; + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + op->dest.type = OP_DEST_PUSH; + } break; case 0x58 ... 0x5f: /* pop reg */ - *type = INSN_STACK; - op->src.type = OP_SRC_POP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b]; + } break; case 0x68: case 0x6a: /* push immediate */ - *type = INSN_STACK; - op->src.type = OP_SRC_CONST; - op->dest.type = OP_DEST_PUSH; + ADD_OP(op) { + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + } break; case 0x70 ... 0x7f: @@ -170,12 +193,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, if (modrm == 0xe4) { /* and imm, %rsp */ - *type = INSN_STACK; - op->src.type = OP_SRC_AND; - op->src.reg = CFI_SP; - op->src.offset = insn.immediate.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_AND; + op->src.reg = CFI_SP; + op->src.offset = insn.immediate.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } break; } @@ -187,34 +211,37 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, break; /* add/sub imm, %rsp */ - *type = INSN_STACK; - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_SP; - op->src.offset = insn.immediate.value * sign; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = insn.immediate.value * sign; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } break; case 0x89: if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { /* mov %rsp, reg */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + } break; } if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) { /* mov reg, %rsp */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } break; } @@ -224,22 +251,24 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) { /* mov reg, disp(%rbp) */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_DEST_REG_INDIRECT; - op->dest.reg = CFI_BP; - op->dest.offset = insn.displacement.value; + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = CFI_BP; + op->dest.offset = insn.displacement.value; + } } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { /* mov reg, disp(%rsp) */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG; - op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; - op->dest.type = OP_DEST_REG_INDIRECT; - op->dest.reg = CFI_SP; - op->dest.offset = insn.displacement.value; + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = op_to_cfi_reg[modrm_reg][rex_r]; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = CFI_SP; + op->dest.offset = insn.displacement.value; + } } break; @@ -248,23 +277,25 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) { /* mov disp(%rbp), reg */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG_INDIRECT; - op->src.reg = CFI_BP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + ADD_OP(op) { + op->src.type = OP_SRC_REG_INDIRECT; + op->src.reg = CFI_BP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + } } else if (rex_w && !rex_b && sib == 0x24 && modrm_mod != 3 && modrm_rm == 4) { /* mov disp(%rsp), reg */ - *type = INSN_STACK; - op->src.type = OP_SRC_REG_INDIRECT; - op->src.reg = CFI_SP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + ADD_OP(op) { + op->src.type = OP_SRC_REG_INDIRECT; + op->src.reg = CFI_SP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; + } } break; @@ -272,28 +303,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, case 0x8d: if (sib == 0x24 && rex_w && !rex_b && !rex_x) { - *type = INSN_STACK; - if (!insn.displacement.value) { - /* lea (%rsp), reg */ - op->src.type = OP_SRC_REG; - } else { - /* lea disp(%rsp), reg */ - op->src.type = OP_SRC_ADD; - op->src.offset = insn.displacement.value; + ADD_OP(op) { + if (!insn.displacement.value) { + /* lea (%rsp), reg */ + op->src.type = OP_SRC_REG; + } else { + /* lea disp(%rsp), reg */ + op->src.type = OP_SRC_ADD; + op->src.offset = insn.displacement.value; + } + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; } - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; } else if (rex == 0x48 && modrm == 0x65) { /* lea disp(%rbp), %rsp */ - *type = INSN_STACK; - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_BP; - op->src.offset = insn.displacement.value; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_BP; + op->src.offset = insn.displacement.value; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } } else if (rex == 0x49 && modrm == 0x62 && insn.displacement.value == -8) { @@ -304,12 +337,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, * Restoring rsp back to its original value after a * stack realignment. */ - *type = INSN_STACK; - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R10; - op->src.offset = -8; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_R10; + op->src.offset = -8; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } } else if (rex == 0x49 && modrm == 0x65 && insn.displacement.value == -16) { @@ -320,21 +354,23 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, * Restoring rsp back to its original value after a * stack realignment. */ - *type = INSN_STACK; - op->src.type = OP_SRC_ADD; - op->src.reg = CFI_R13; - op->src.offset = -16; - op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_SP; + ADD_OP(op) { + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_R13; + op->src.offset = -16; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } } break; case 0x8f: /* pop to mem */ - *type = INSN_STACK; - op->src.type = OP_SRC_POP; - op->dest.type = OP_DEST_MEM; + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_MEM; + } break; case 0x90: @@ -343,16 +379,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, case 0x9c: /* pushf */ - *type = INSN_STACK; - op->src.type = OP_SRC_CONST; - op->dest.type = OP_DEST_PUSHF; + ADD_OP(op) { + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSHF; + } break; case 0x9d: /* popf */ - *type = INSN_STACK; - op->src.type = OP_SRC_POPF; - op->dest.type = OP_DEST_MEM; + ADD_OP(op) { + op->src.type = OP_SRC_POPF; + op->dest.type = OP_DEST_MEM; + } break; case 0x0f: @@ -387,16 +425,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, } else if (op2 == 0xa0 || op2 == 0xa8) { /* push fs/gs */ - *type = INSN_STACK; - op->src.type = OP_SRC_CONST; - op->dest.type = OP_DEST_PUSH; + ADD_OP(op) { + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + } } else if (op2 == 0xa1 || op2 == 0xa9) { /* pop fs/gs */ - *type = INSN_STACK; - op->src.type = OP_SRC_POP; - op->dest.type = OP_DEST_MEM; + ADD_OP(op) { + op->src.type = OP_SRC_POP; + op->dest.type = OP_DEST_MEM; + } } break; @@ -409,8 +449,8 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, * mov bp, sp * pop bp */ - *type = INSN_STACK; - op->dest.type = OP_DEST_LEAVE; + ADD_OP(op) + op->dest.type = OP_DEST_LEAVE; break; @@ -429,14 +469,41 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, *type = INSN_RETURN; break; + case 0xcf: /* iret */ + /* + * Handle sync_core(), which has an IRET to self. + * All other IRET are in STT_NONE entry code. + */ + sym = find_symbol_containing(sec, offset); + if (sym && sym->type == STT_FUNC) { + ADD_OP(op) { + /* add $40, %rsp */ + op->src.type = OP_SRC_ADD; + op->src.reg = CFI_SP; + op->src.offset = 5*8; + op->dest.type = OP_DEST_REG; + op->dest.reg = CFI_SP; + } + break; + } + + /* fallthrough */ + case 0xca: /* retf */ case 0xcb: /* retf */ - case 0xcf: /* iret */ *type = INSN_CONTEXT_SWITCH; break; case 0xe8: *type = INSN_CALL; + /* + * For the impact on the stack, a CALL behaves like + * a PUSH of an immediate value (the return address). + */ + ADD_OP(op) { + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + } break; case 0xfc: @@ -464,9 +531,10 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, else if (modrm_reg == 6) { /* push from mem */ - *type = INSN_STACK; - op->src.type = OP_SRC_CONST; - op->dest.type = OP_DEST_PUSH; + ADD_OP(op) { + op->src.type = OP_SRC_CONST; + op->dest.type = OP_DEST_PUSH; + } } break; @@ -480,7 +548,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, return 0; } -void arch_initial_func_cfi_state(struct cfi_state *state) +void arch_initial_func_cfi_state(struct cfi_init_state *state) { int i; diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/cfi_regs.h new file mode 100644 index 000000000000..79bc517efba8 --- /dev/null +++ b/tools/objtool/arch/x86/include/cfi_regs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_CFI_REGS_H +#define _OBJTOOL_CFI_REGS_H + +#define CFI_AX 0 +#define CFI_DX 1 +#define CFI_CX 2 +#define CFI_BX 3 +#define CFI_SI 4 +#define CFI_DI 5 +#define CFI_BP 6 +#define CFI_SP 7 +#define CFI_R8 8 +#define CFI_R9 9 +#define CFI_R10 10 +#define CFI_R11 11 +#define CFI_R12 12 +#define CFI_R13 13 +#define CFI_R14 14 +#define CFI_R15 15 +#define CFI_RA 16 +#define CFI_NUM_REGS 17 + +#endif /* _OBJTOOL_CFI_REGS_H */ diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 10fbe75ab43d..7a44174967b5 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -14,10 +14,11 @@ */ #include <subcmd/parse-options.h> +#include <string.h> #include "builtin.h" -#include "check.h" +#include "objtool.h" -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; static const char * const check_usage[] = { "objtool check [<options>] file.o", @@ -32,12 +33,14 @@ const struct option check_options[] = { OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), OPT_BOOLEAN('s', "stats", &stats, "print statistics"), + OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"), + OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_END(), }; int cmd_check(int argc, const char **argv) { - const char *objname; + const char *objname, *s; argc = parse_options(argc, argv, check_options, check_usage, 0); @@ -46,5 +49,9 @@ int cmd_check(int argc, const char **argv) objname = argv[0]; + s = strstr(objname, "vmlinux.o"); + if (s && !s[9]) + vmlinux = true; + return check(objname, false); } diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 5f7cc6157edd..b1dfe2007962 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -14,8 +14,7 @@ #include <string.h> #include "builtin.h" -#include "check.h" - +#include "objtool.h" static const char *orc_usage[] = { "objtool orc generate [<options>] file.o", diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 0b907902ee79..85c979caa367 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -8,7 +8,7 @@ #include <subcmd/parse-options.h> extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h index 4427bf8ed686..c7c59c6a44ee 100644 --- a/tools/objtool/cfi.h +++ b/tools/objtool/cfi.h @@ -6,38 +6,33 @@ #ifndef _OBJTOOL_CFI_H #define _OBJTOOL_CFI_H +#include "cfi_regs.h" + #define CFI_UNDEFINED -1 #define CFI_CFA -2 #define CFI_SP_INDIRECT -3 #define CFI_BP_INDIRECT -4 -#define CFI_AX 0 -#define CFI_DX 1 -#define CFI_CX 2 -#define CFI_BX 3 -#define CFI_SI 4 -#define CFI_DI 5 -#define CFI_BP 6 -#define CFI_SP 7 -#define CFI_R8 8 -#define CFI_R9 9 -#define CFI_R10 10 -#define CFI_R11 11 -#define CFI_R12 12 -#define CFI_R13 13 -#define CFI_R14 14 -#define CFI_R15 15 -#define CFI_RA 16 -#define CFI_NUM_REGS 17 - struct cfi_reg { int base; int offset; }; -struct cfi_state { +struct cfi_init_state { + struct cfi_reg regs[CFI_NUM_REGS]; struct cfi_reg cfa; +}; + +struct cfi_state { struct cfi_reg regs[CFI_NUM_REGS]; + struct cfi_reg vals[CFI_NUM_REGS]; + struct cfi_reg cfa; + int stack_size; + int drap_reg, drap_offset; + unsigned char type; + bool bp_scratch; + bool drap; + bool end; }; #endif /* _OBJTOOL_CFI_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4b170fd08a28..63d65a702900 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -7,10 +7,10 @@ #include <stdlib.h> #include "builtin.h" +#include "cfi.h" +#include "arch.h" #include "check.h" -#include "elf.h" #include "special.h" -#include "arch.h" #include "warn.h" #include <linux/hashtable.h> @@ -27,16 +27,17 @@ struct alternative { }; const char *objname; -struct cfi_state initial_func_cfi; +struct cfi_init_state initial_func_cfi; struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset) { struct instruction *insn; - hash_for_each_possible(file->insn_hash, insn, hash, offset) + hash_for_each_possible(file->insn_hash, insn, hash, sec_offset_hash(sec, offset)) { if (insn->sec == sec && insn->offset == offset) return insn; + } return NULL; } @@ -72,6 +73,17 @@ static struct instruction *next_insn_same_func(struct objtool_file *file, return find_insn(file, func->cfunc->sec, func->cfunc->offset); } +static struct instruction *prev_insn_same_sym(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *prev = list_prev_entry(insn, list); + + if (&prev->list != &file->insn_list && prev->func == insn->func) + return prev; + + return NULL; +} + #define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn; \ @@ -215,18 +227,31 @@ static bool dead_end_function(struct objtool_file *file, struct symbol *func) return __dead_end_function(file, func, 0); } -static void clear_insn_state(struct insn_state *state) +static void init_cfi_state(struct cfi_state *cfi) { int i; - memset(state, 0, sizeof(*state)); - state->cfa.base = CFI_UNDEFINED; for (i = 0; i < CFI_NUM_REGS; i++) { - state->regs[i].base = CFI_UNDEFINED; - state->vals[i].base = CFI_UNDEFINED; + cfi->regs[i].base = CFI_UNDEFINED; + cfi->vals[i].base = CFI_UNDEFINED; } - state->drap_reg = CFI_UNDEFINED; - state->drap_offset = -1; + cfi->cfa.base = CFI_UNDEFINED; + cfi->drap_reg = CFI_UNDEFINED; + cfi->drap_offset = -1; +} + +static void init_insn_state(struct insn_state *state, struct section *sec) +{ + memset(state, 0, sizeof(*state)); + init_cfi_state(&state->cfi); + + /* + * We need the full vmlinux for noinstr validation, otherwise we can + * not correctly determine insn->call_dest->sec (external symbols do + * not have a section). + */ + if (vmlinux && sec) + state->noinstr = sec->noinstr; } /* @@ -252,6 +277,10 @@ static int decode_instructions(struct objtool_file *file) strncmp(sec->name, ".discard.", 9)) sec->text = true; + if (!strcmp(sec->name, ".noinstr.text") || + !strcmp(sec->name, ".entry.text")) + sec->noinstr = true; + for (offset = 0; offset < sec->len; offset += insn->len) { insn = malloc(sizeof(*insn)); if (!insn) { @@ -260,7 +289,8 @@ static int decode_instructions(struct objtool_file *file) } memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); - clear_insn_state(&insn->state); + INIT_LIST_HEAD(&insn->stack_ops); + init_cfi_state(&insn->cfi); insn->sec = sec; insn->offset = offset; @@ -269,11 +299,11 @@ static int decode_instructions(struct objtool_file *file) sec->len - offset, &insn->len, &insn->type, &insn->immediate, - &insn->stack_op); + &insn->stack_ops); if (ret) goto err; - hash_add(file->insn_hash, &insn->hash, insn->offset); + hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset)); list_add_tail(&insn->list, &file->insn_list); nr_insns++; } @@ -303,6 +333,19 @@ err: return ret; } +static struct instruction *find_last_insn(struct objtool_file *file, + struct section *sec) +{ + struct instruction *insn = NULL; + unsigned int offset; + unsigned int end = (sec->len > 10) ? sec->len - 10 : 0; + + for (offset = sec->len - 1; offset >= end && !insn; offset--) + insn = find_insn(file, sec, offset); + + return insn; +} + /* * Mark "ud2" instructions and manually annotated dead ends. */ @@ -311,7 +354,6 @@ static int add_dead_ends(struct objtool_file *file) struct section *sec; struct rela *rela; struct instruction *insn; - bool found; /* * By default, "ud2" is a dead end unless otherwise annotated, because @@ -337,15 +379,8 @@ static int add_dead_ends(struct objtool_file *file) if (insn) insn = list_prev_entry(insn, list); else if (rela->addend == rela->sym->sec->len) { - found = false; - list_for_each_entry_reverse(insn, &file->insn_list, list) { - if (insn->sec == rela->sym->sec) { - found = true; - break; - } - } - - if (!found) { + insn = find_last_insn(file, rela->sym->sec); + if (!insn) { WARN("can't find unreachable insn at %s+0x%x", rela->sym->sec->name, rela->addend); return -1; @@ -379,15 +414,8 @@ reachable: if (insn) insn = list_prev_entry(insn, list); else if (rela->addend == rela->sym->sec->len) { - found = false; - list_for_each_entry_reverse(insn, &file->insn_list, list) { - if (insn->sec == rela->sym->sec) { - found = true; - break; - } - } - - if (!found) { + insn = find_last_insn(file, rela->sym->sec); + if (!insn) { WARN("can't find reachable insn at %s+0x%x", rela->sym->sec->name, rela->addend); return -1; @@ -479,6 +507,7 @@ static const char *uaccess_safe_builtin[] = { "__asan_report_store16_noabort", /* KCOV */ "write_comp_data", + "check_kcov_mode", "__sanitizer_cov_trace_pc", "__sanitizer_cov_trace_const_cmp1", "__sanitizer_cov_trace_const_cmp2", @@ -574,13 +603,14 @@ static int add_jump_destinations(struct objtool_file *file) insn->offset, insn->len); if (!rela) { dest_sec = insn->sec; - dest_off = insn->offset + insn->len + insn->immediate; + dest_off = arch_jump_destination(insn); } else if (rela->sym->type == STT_SECTION) { dest_sec = rela->sym->sec; - dest_off = rela->addend + 4; + dest_off = arch_dest_rela_offset(rela->addend); } else if (rela->sym->sec->idx) { dest_sec = rela->sym->sec; - dest_off = rela->sym->sym.st_value + rela->addend + 4; + dest_off = rela->sym->sym.st_value + + arch_dest_rela_offset(rela->addend); } else if (strstr(rela->sym->name, "_indirect_thunk_")) { /* * Retpoline jumps are really dynamic jumps in @@ -654,6 +684,16 @@ static int add_jump_destinations(struct objtool_file *file) return 0; } +static void remove_insn_ops(struct instruction *insn) +{ + struct stack_op *op, *tmp; + + list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) { + list_del(&op->list); + free(op); + } +} + /* * Find the destination instructions for all calls. */ @@ -670,7 +710,7 @@ static int add_call_destinations(struct objtool_file *file) rela = find_rela_by_dest_range(file->elf, insn->sec, insn->offset, insn->len); if (!rela) { - dest_off = insn->offset + insn->len + insn->immediate; + dest_off = arch_jump_destination(insn); insn->call_dest = find_func_by_offset(insn->sec, dest_off); if (!insn->call_dest) insn->call_dest = find_symbol_by_offset(insn->sec, dest_off); @@ -679,10 +719,7 @@ static int add_call_destinations(struct objtool_file *file) continue; if (!insn->call_dest) { - WARN_FUNC("unsupported intra-function call", - insn->sec, insn->offset); - if (retpoline) - WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE."); + WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset); return -1; } @@ -693,17 +730,27 @@ static int add_call_destinations(struct objtool_file *file) } } else if (rela->sym->type == STT_SECTION) { + dest_off = arch_dest_rela_offset(rela->addend); insn->call_dest = find_func_by_offset(rela->sym->sec, - rela->addend+4); + dest_off); if (!insn->call_dest) { - WARN_FUNC("can't find call dest symbol at %s+0x%x", + WARN_FUNC("can't find call dest symbol at %s+0x%lx", insn->sec, insn->offset, rela->sym->sec->name, - rela->addend + 4); + dest_off); return -1; } } else insn->call_dest = rela->sym; + + /* + * Whatever stack impact regular CALLs have, should be undone + * by the RETURN of the called function. + * + * Annotated intra-function calls retain the stack_ops but + * are converted to JUMP, see read_intra_function_calls(). + */ + remove_insn_ops(insn); } return 0; @@ -731,7 +778,9 @@ static int handle_group_alt(struct objtool_file *file, struct instruction *orig_insn, struct instruction **new_insn) { + static unsigned int alt_group_next_index = 1; struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; + unsigned int alt_group = alt_group_next_index++; unsigned long dest_off; last_orig_insn = NULL; @@ -740,7 +789,7 @@ static int handle_group_alt(struct objtool_file *file, if (insn->offset >= special_alt->orig_off + special_alt->orig_len) break; - insn->alt_group = true; + insn->alt_group = alt_group; last_orig_insn = insn; } @@ -752,7 +801,8 @@ static int handle_group_alt(struct objtool_file *file, } memset(fake_jump, 0, sizeof(*fake_jump)); INIT_LIST_HEAD(&fake_jump->alts); - clear_insn_state(&fake_jump->state); + INIT_LIST_HEAD(&fake_jump->stack_ops); + init_cfi_state(&fake_jump->cfi); fake_jump->sec = special_alt->new_sec; fake_jump->offset = FAKE_JUMP_OFFSET; @@ -773,6 +823,7 @@ static int handle_group_alt(struct objtool_file *file, } last_new_insn = NULL; + alt_group = alt_group_next_index++; insn = *new_insn; sec_for_each_insn_from(file, insn) { if (insn->offset >= special_alt->new_off + special_alt->new_len) @@ -782,6 +833,7 @@ static int handle_group_alt(struct objtool_file *file, insn->ignore = orig_insn->ignore_alts; insn->func = orig_insn->func; + insn->alt_group = alt_group; /* * Since alternative replacement code is copy/pasted by the @@ -810,7 +862,7 @@ static int handle_group_alt(struct objtool_file *file, if (!insn->immediate) continue; - dest_off = insn->offset + insn->len + insn->immediate; + dest_off = arch_jump_destination(insn); if (dest_off == special_alt->new_off + special_alt->new_len) { if (!fake_jump) { WARN("%s: alternative jump to end of section", @@ -905,6 +957,12 @@ static int add_special_section_alts(struct objtool_file *file) } if (special_alt->group) { + if (!special_alt->orig_len) { + WARN_FUNC("empty alternative entry", + orig_insn->sec, orig_insn->offset); + continue; + } + ret = handle_group_alt(file, special_alt, orig_insn, &new_insn); if (ret) @@ -1050,8 +1108,8 @@ static struct rela *find_jump_table(struct objtool_file *file, * it. */ for (; - &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func; - insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { + insn && insn->func && insn->func->pfunc == func; + insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) { if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) break; @@ -1242,15 +1300,10 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfa = &insn->state.cfa; + cfa = &insn->cfi.cfa; - if (hint->type == UNWIND_HINT_TYPE_SAVE) { - insn->save = true; - continue; - - } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) { - insn->restore = true; - insn->hint = true; + if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { + insn->ret_offset = hint->sp_offset; continue; } @@ -1288,8 +1341,8 @@ static int read_unwind_hints(struct objtool_file *file) } cfa->offset = hint->sp_offset; - insn->state.type = hint->type; - insn->state.end = hint->end; + insn->cfi.type = hint->type; + insn->cfi.end = hint->end; } return 0; @@ -1330,6 +1383,104 @@ static int read_retpoline_hints(struct objtool_file *file) return 0; } +static int read_instr_hints(struct objtool_file *file) +{ + struct section *sec; + struct instruction *insn; + struct rela *rela; + + sec = find_section_by_name(file->elf, ".rela.discard.instr_end"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.instr_end entry"); + return -1; + } + + insn->instr--; + } + + sec = find_section_by_name(file->elf, ".rela.discard.instr_begin"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.instr_begin entry"); + return -1; + } + + insn->instr++; + } + + return 0; +} + +static int read_intra_function_calls(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + struct rela *rela; + + sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + unsigned long dest_off; + + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in %s", + sec->name); + return -1; + } + + insn = find_insn(file, rela->sym->sec, rela->addend); + if (!insn) { + WARN("bad .discard.intra_function_call entry"); + return -1; + } + + if (insn->type != INSN_CALL) { + WARN_FUNC("intra_function_call not a direct call", + insn->sec, insn->offset); + return -1; + } + + /* + * Treat intra-function CALLs as JMPs, but with a stack_op. + * See add_call_destinations(), which strips stack_ops from + * normal CALLs. + */ + insn->type = INSN_JUMP_UNCONDITIONAL; + + dest_off = insn->offset + insn->len + insn->immediate; + insn->jump_dest = find_insn(file, insn->sec, dest_off); + if (!insn->jump_dest) { + WARN_FUNC("can't find call dest at %s+0x%lx", + insn->sec, insn->offset, + insn->sec->name, dest_off); + return -1; + } + } + + return 0; +} + static void mark_rodata(struct objtool_file *file) { struct section *sec; @@ -1346,8 +1497,8 @@ static void mark_rodata(struct objtool_file *file) * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) || - !strcmp(sec->name, C_JUMP_TABLE_SECTION)) { + if (!strncmp(sec->name, ".rodata", 7) && + !strstr(sec->name, ".str1.")) { sec->rodata = true; found = true; } @@ -1385,6 +1536,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_intra_function_calls(file); + if (ret) + return ret; + ret = add_call_destinations(file); if (ret) return ret; @@ -1401,12 +1556,16 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = read_instr_hints(file); + if (ret) + return ret; + return 0; } static bool is_fentry_call(struct instruction *insn) { - if (insn->type == INSN_CALL && + if (insn->type == INSN_CALL && insn->call_dest && insn->call_dest->type == STT_NOTYPE && !strcmp(insn->call_dest->name, "__fentry__")) return true; @@ -1414,42 +1573,59 @@ static bool is_fentry_call(struct instruction *insn) return false; } -static bool has_modified_stack_frame(struct insn_state *state) +static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state) { + u8 ret_offset = insn->ret_offset; + struct cfi_state *cfi = &state->cfi; int i; - if (state->cfa.base != initial_func_cfi.cfa.base || - state->cfa.offset != initial_func_cfi.cfa.offset || - state->stack_size != initial_func_cfi.cfa.offset || - state->drap) + if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap) + return true; + + if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset) return true; - for (i = 0; i < CFI_NUM_REGS; i++) - if (state->regs[i].base != initial_func_cfi.regs[i].base || - state->regs[i].offset != initial_func_cfi.regs[i].offset) + if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset) + return true; + + /* + * If there is a ret offset hint then don't check registers + * because a callee-saved register might have been pushed on + * the stack. + */ + if (ret_offset) + return false; + + for (i = 0; i < CFI_NUM_REGS; i++) { + if (cfi->regs[i].base != initial_func_cfi.regs[i].base || + cfi->regs[i].offset != initial_func_cfi.regs[i].offset) return true; + } return false; } static bool has_valid_stack_frame(struct insn_state *state) { - if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA && - state->regs[CFI_BP].offset == -16) + struct cfi_state *cfi = &state->cfi; + + if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA && + cfi->regs[CFI_BP].offset == -16) return true; - if (state->drap && state->regs[CFI_BP].base == CFI_BP) + if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP) return true; return false; } -static int update_insn_state_regs(struct instruction *insn, struct insn_state *state) +static int update_cfi_state_regs(struct instruction *insn, + struct cfi_state *cfi, + struct stack_op *op) { - struct cfi_reg *cfa = &state->cfa; - struct stack_op *op = &insn->stack_op; + struct cfi_reg *cfa = &cfi->cfa; - if (cfa->base != CFI_SP) + if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT) return 0; /* push */ @@ -1468,20 +1644,19 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s return 0; } -static void save_reg(struct insn_state *state, unsigned char reg, int base, - int offset) +static void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset) { if (arch_callee_saved_reg(reg) && - state->regs[reg].base == CFI_UNDEFINED) { - state->regs[reg].base = base; - state->regs[reg].offset = offset; + cfi->regs[reg].base == CFI_UNDEFINED) { + cfi->regs[reg].base = base; + cfi->regs[reg].offset = offset; } } -static void restore_reg(struct insn_state *state, unsigned char reg) +static void restore_reg(struct cfi_state *cfi, unsigned char reg) { - state->regs[reg].base = CFI_UNDEFINED; - state->regs[reg].offset = 0; + cfi->regs[reg].base = initial_func_cfi.regs[reg].base; + cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset; } /* @@ -1537,11 +1712,11 @@ static void restore_reg(struct insn_state *state, unsigned char reg) * 41 5d pop %r13 * c3 retq */ -static int update_insn_state(struct instruction *insn, struct insn_state *state) +static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, + struct stack_op *op) { - struct stack_op *op = &insn->stack_op; - struct cfi_reg *cfa = &state->cfa; - struct cfi_reg *regs = state->regs; + struct cfi_reg *cfa = &cfi->cfa; + struct cfi_reg *regs = cfi->regs; /* stack operations don't make sense with an undefined CFA */ if (cfa->base == CFI_UNDEFINED) { @@ -1552,8 +1727,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) return 0; } - if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET) - return update_insn_state_regs(insn, state); + if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET) + return update_cfi_state_regs(insn, cfi, op); switch (op->dest.type) { @@ -1568,16 +1743,16 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) /* mov %rsp, %rbp */ cfa->base = op->dest.reg; - state->bp_scratch = false; + cfi->bp_scratch = false; } else if (op->src.reg == CFI_SP && - op->dest.reg == CFI_BP && state->drap) { + op->dest.reg == CFI_BP && cfi->drap) { /* drap: mov %rsp, %rbp */ regs[CFI_BP].base = CFI_BP; - regs[CFI_BP].offset = -state->stack_size; - state->bp_scratch = false; + regs[CFI_BP].offset = -cfi->stack_size; + cfi->bp_scratch = false; } else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { @@ -1592,8 +1767,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) * ... * mov %rax, %rsp */ - state->vals[op->dest.reg].base = CFI_CFA; - state->vals[op->dest.reg].offset = -state->stack_size; + cfi->vals[op->dest.reg].base = CFI_CFA; + cfi->vals[op->dest.reg].offset = -cfi->stack_size; } else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP && @@ -1604,14 +1779,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) * * Restore the original stack pointer (Clang). */ - state->stack_size = -state->regs[CFI_BP].offset; + cfi->stack_size = -cfi->regs[CFI_BP].offset; } else if (op->dest.reg == cfa->base) { /* mov %reg, %rsp */ if (cfa->base == CFI_SP && - state->vals[op->src.reg].base == CFI_CFA) { + cfi->vals[op->src.reg].base == CFI_CFA) { /* * This is needed for the rare case @@ -1621,8 +1796,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) * ... * mov %rcx, %rsp */ - cfa->offset = -state->vals[op->src.reg].offset; - state->stack_size = cfa->offset; + cfa->offset = -cfi->vals[op->src.reg].offset; + cfi->stack_size = cfa->offset; } else { cfa->base = CFI_UNDEFINED; @@ -1636,7 +1811,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) { /* add imm, %rsp */ - state->stack_size -= op->src.offset; + cfi->stack_size -= op->src.offset; if (cfa->base == CFI_SP) cfa->offset -= op->src.offset; break; @@ -1645,14 +1820,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { /* lea disp(%rbp), %rsp */ - state->stack_size = -(op->src.offset + regs[CFI_BP].offset); + cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); break; } if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { /* drap: lea disp(%rsp), %drap */ - state->drap_reg = op->dest.reg; + cfi->drap_reg = op->dest.reg; /* * lea disp(%rsp), %reg @@ -1664,25 +1839,25 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) * ... * mov %rcx, %rsp */ - state->vals[op->dest.reg].base = CFI_CFA; - state->vals[op->dest.reg].offset = \ - -state->stack_size + op->src.offset; + cfi->vals[op->dest.reg].base = CFI_CFA; + cfi->vals[op->dest.reg].offset = \ + -cfi->stack_size + op->src.offset; break; } - if (state->drap && op->dest.reg == CFI_SP && - op->src.reg == state->drap_reg) { + if (cfi->drap && op->dest.reg == CFI_SP && + op->src.reg == cfi->drap_reg) { /* drap: lea disp(%drap), %rsp */ cfa->base = CFI_SP; - cfa->offset = state->stack_size = -op->src.offset; - state->drap_reg = CFI_UNDEFINED; - state->drap = false; + cfa->offset = cfi->stack_size = -op->src.offset; + cfi->drap_reg = CFI_UNDEFINED; + cfi->drap = false; break; } - if (op->dest.reg == state->cfa.base) { + if (op->dest.reg == cfi->cfa.base) { WARN_FUNC("unsupported stack register modification", insn->sec, insn->offset); return -1; @@ -1692,18 +1867,18 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) case OP_SRC_AND: if (op->dest.reg != CFI_SP || - (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || - (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { + (cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) || + (cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) { WARN_FUNC("unsupported stack pointer realignment", insn->sec, insn->offset); return -1; } - if (state->drap_reg != CFI_UNDEFINED) { + if (cfi->drap_reg != CFI_UNDEFINED) { /* drap: and imm, %rsp */ - cfa->base = state->drap_reg; - cfa->offset = state->stack_size = 0; - state->drap = true; + cfa->base = cfi->drap_reg; + cfa->offset = cfi->stack_size = 0; + cfi->drap = true; } /* @@ -1715,57 +1890,55 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) case OP_SRC_POP: case OP_SRC_POPF: - if (!state->drap && op->dest.type == OP_DEST_REG && - op->dest.reg == cfa->base) { + if (!cfi->drap && op->dest.reg == cfa->base) { /* pop %rbp */ cfa->base = CFI_SP; } - if (state->drap && cfa->base == CFI_BP_INDIRECT && - op->dest.type == OP_DEST_REG && - op->dest.reg == state->drap_reg && - state->drap_offset == -state->stack_size) { + if (cfi->drap && cfa->base == CFI_BP_INDIRECT && + op->dest.reg == cfi->drap_reg && + cfi->drap_offset == -cfi->stack_size) { /* drap: pop %drap */ - cfa->base = state->drap_reg; + cfa->base = cfi->drap_reg; cfa->offset = 0; - state->drap_offset = -1; + cfi->drap_offset = -1; - } else if (regs[op->dest.reg].offset == -state->stack_size) { + } else if (regs[op->dest.reg].offset == -cfi->stack_size) { /* pop %reg */ - restore_reg(state, op->dest.reg); + restore_reg(cfi, op->dest.reg); } - state->stack_size -= 8; + cfi->stack_size -= 8; if (cfa->base == CFI_SP) cfa->offset -= 8; break; case OP_SRC_REG_INDIRECT: - if (state->drap && op->src.reg == CFI_BP && - op->src.offset == state->drap_offset) { + if (cfi->drap && op->src.reg == CFI_BP && + op->src.offset == cfi->drap_offset) { /* drap: mov disp(%rbp), %drap */ - cfa->base = state->drap_reg; + cfa->base = cfi->drap_reg; cfa->offset = 0; - state->drap_offset = -1; + cfi->drap_offset = -1; } - if (state->drap && op->src.reg == CFI_BP && + if (cfi->drap && op->src.reg == CFI_BP && op->src.offset == regs[op->dest.reg].offset) { /* drap: mov disp(%rbp), %reg */ - restore_reg(state, op->dest.reg); + restore_reg(cfi, op->dest.reg); } else if (op->src.reg == cfa->base && op->src.offset == regs[op->dest.reg].offset + cfa->offset) { /* mov disp(%rbp), %reg */ /* mov disp(%rsp), %reg */ - restore_reg(state, op->dest.reg); + restore_reg(cfi, op->dest.reg); } break; @@ -1780,78 +1953,78 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) case OP_DEST_PUSH: case OP_DEST_PUSHF: - state->stack_size += 8; + cfi->stack_size += 8; if (cfa->base == CFI_SP) cfa->offset += 8; if (op->src.type != OP_SRC_REG) break; - if (state->drap) { - if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { + if (cfi->drap) { + if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) { /* drap: push %drap */ cfa->base = CFI_BP_INDIRECT; - cfa->offset = -state->stack_size; + cfa->offset = -cfi->stack_size; /* save drap so we know when to restore it */ - state->drap_offset = -state->stack_size; + cfi->drap_offset = -cfi->stack_size; - } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) { + } else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) { /* drap: push %rbp */ - state->stack_size = 0; + cfi->stack_size = 0; } else if (regs[op->src.reg].base == CFI_UNDEFINED) { /* drap: push %reg */ - save_reg(state, op->src.reg, CFI_BP, -state->stack_size); + save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size); } } else { /* push %reg */ - save_reg(state, op->src.reg, CFI_CFA, -state->stack_size); + save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size); } /* detect when asm code uses rbp as a scratch register */ if (!no_fp && insn->func && op->src.reg == CFI_BP && cfa->base != CFI_BP) - state->bp_scratch = true; + cfi->bp_scratch = true; break; case OP_DEST_REG_INDIRECT: - if (state->drap) { - if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) { + if (cfi->drap) { + if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) { /* drap: mov %drap, disp(%rbp) */ cfa->base = CFI_BP_INDIRECT; cfa->offset = op->dest.offset; /* save drap offset so we know when to restore it */ - state->drap_offset = op->dest.offset; + cfi->drap_offset = op->dest.offset; } else if (regs[op->src.reg].base == CFI_UNDEFINED) { /* drap: mov reg, disp(%rbp) */ - save_reg(state, op->src.reg, CFI_BP, op->dest.offset); + save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset); } } else if (op->dest.reg == cfa->base) { /* mov reg, disp(%rbp) */ /* mov reg, disp(%rsp) */ - save_reg(state, op->src.reg, CFI_CFA, - op->dest.offset - state->cfa.offset); + save_reg(cfi, op->src.reg, CFI_CFA, + op->dest.offset - cfi->cfa.offset); } break; case OP_DEST_LEAVE: - if ((!state->drap && cfa->base != CFI_BP) || - (state->drap && cfa->base != state->drap_reg)) { + if ((!cfi->drap && cfa->base != CFI_BP) || + (cfi->drap && cfa->base != cfi->drap_reg)) { WARN_FUNC("leave instruction with modified stack frame", insn->sec, insn->offset); return -1; @@ -1859,10 +2032,10 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) /* leave (mov %rbp, %rsp; pop %rbp) */ - state->stack_size = -state->regs[CFI_BP].offset - 8; - restore_reg(state, CFI_BP); + cfi->stack_size = -cfi->regs[CFI_BP].offset - 8; + restore_reg(cfi, CFI_BP); - if (!state->drap) { + if (!cfi->drap) { cfa->base = CFI_SP; cfa->offset -= 8; } @@ -1877,7 +2050,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) } /* pop mem */ - state->stack_size -= 8; + cfi->stack_size -= 8; if (cfa->base == CFI_SP) cfa->offset -= 8; @@ -1892,41 +2065,86 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) return 0; } -static bool insn_state_match(struct instruction *insn, struct insn_state *state) +static int handle_insn_ops(struct instruction *insn, struct insn_state *state) { - struct insn_state *state1 = &insn->state, *state2 = state; + struct stack_op *op; + + list_for_each_entry(op, &insn->stack_ops, list) { + struct cfi_state old_cfi = state->cfi; + int res; + + res = update_cfi_state(insn, &state->cfi, op); + if (res) + return res; + + if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { + WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); + return -1; + } + + if (op->dest.type == OP_DEST_PUSHF) { + if (!state->uaccess_stack) { + state->uaccess_stack = 1; + } else if (state->uaccess_stack >> 31) { + WARN_FUNC("PUSHF stack exhausted", + insn->sec, insn->offset); + return 1; + } + state->uaccess_stack <<= 1; + state->uaccess_stack |= state->uaccess; + } + + if (op->src.type == OP_SRC_POPF) { + if (state->uaccess_stack) { + state->uaccess = state->uaccess_stack & 1; + state->uaccess_stack >>= 1; + if (state->uaccess_stack == 1) + state->uaccess_stack = 0; + } + } + } + + return 0; +} + +static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2) +{ + struct cfi_state *cfi1 = &insn->cfi; int i; - if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) { + if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) { + WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d", insn->sec, insn->offset, - state1->cfa.base, state1->cfa.offset, - state2->cfa.base, state2->cfa.offset); + cfi1->cfa.base, cfi1->cfa.offset, + cfi2->cfa.base, cfi2->cfa.offset); - } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) { + } else if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) { for (i = 0; i < CFI_NUM_REGS; i++) { - if (!memcmp(&state1->regs[i], &state2->regs[i], + if (!memcmp(&cfi1->regs[i], &cfi2->regs[i], sizeof(struct cfi_reg))) continue; WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d", insn->sec, insn->offset, - i, state1->regs[i].base, state1->regs[i].offset, - i, state2->regs[i].base, state2->regs[i].offset); + i, cfi1->regs[i].base, cfi1->regs[i].offset, + i, cfi2->regs[i].base, cfi2->regs[i].offset); break; } - } else if (state1->type != state2->type) { + } else if (cfi1->type != cfi2->type) { + WARN_FUNC("stack state mismatch: type1=%d type2=%d", - insn->sec, insn->offset, state1->type, state2->type); + insn->sec, insn->offset, cfi1->type, cfi2->type); + + } else if (cfi1->drap != cfi2->drap || + (cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) || + (cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) { - } else if (state1->drap != state2->drap || - (state1->drap && state1->drap_reg != state2->drap_reg) || - (state1->drap && state1->drap_offset != state2->drap_offset)) { WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)", insn->sec, insn->offset, - state1->drap, state1->drap_reg, state1->drap_offset, - state2->drap, state2->drap_reg, state2->drap_offset); + cfi1->drap, cfi1->drap_reg, cfi1->drap_offset, + cfi2->drap, cfi2->drap_reg, cfi2->drap_offset); } else return true; @@ -1952,6 +2170,13 @@ static inline const char *call_dest_name(struct instruction *insn) static int validate_call(struct instruction *insn, struct insn_state *state) { + if (state->noinstr && state->instr <= 0 && + (!insn->call_dest || !insn->call_dest->sec->noinstr)) { + WARN_FUNC("call to %s() leaves .noinstr.text section", + insn->sec, insn->offset, call_dest_name(insn)); + return 1; + } + if (state->uaccess && !func_uaccess_safe(insn->call_dest)) { WARN_FUNC("call to %s() with UACCESS enabled", insn->sec, insn->offset, call_dest_name(insn)); @@ -1969,7 +2194,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state) static int validate_sibling_call(struct instruction *insn, struct insn_state *state) { - if (has_modified_stack_frame(state)) { + if (has_modified_stack_frame(insn, state)) { WARN_FUNC("sibling call from callable instruction with modified stack frame", insn->sec, insn->offset); return 1; @@ -1980,6 +2205,12 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state) { + if (state->noinstr && state->instr > 0) { + WARN_FUNC("return with instrumentation enabled", + insn->sec, insn->offset); + return 1; + } + if (state->uaccess && !func_uaccess_safe(func)) { WARN_FUNC("return with UACCESS enabled", insn->sec, insn->offset); @@ -1998,13 +2229,13 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct return 1; } - if (func && has_modified_stack_frame(state)) { + if (func && has_modified_stack_frame(insn, state)) { WARN_FUNC("return with modified stack frame", insn->sec, insn->offset); return 1; } - if (state->bp_scratch) { + if (state->cfi.bp_scratch) { WARN_FUNC("BP used as a scratch register", insn->sec, insn->offset); return 1; @@ -2014,29 +2245,46 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct } /* + * Alternatives should not contain any ORC entries, this in turn means they + * should not contain any CFI ops, which implies all instructions should have + * the same same CFI state. + * + * It is possible to constuct alternatives that have unreachable holes that go + * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED + * states which then results in ORC entries, which we just said we didn't want. + * + * Avoid them by copying the CFI entry of the first instruction into the whole + * alternative. + */ +static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *first_insn = insn; + int alt_group = insn->alt_group; + + sec_for_each_insn_continue(file, insn) { + if (insn->alt_group != alt_group) + break; + insn->cfi = first_insn->cfi; + } +} + +/* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at * each instruction and validate all the rules described in * tools/objtool/Documentation/stack-validation.txt. */ static int validate_branch(struct objtool_file *file, struct symbol *func, - struct instruction *first, struct insn_state state) + struct instruction *insn, struct insn_state state) { struct alternative *alt; - struct instruction *insn, *next_insn; + struct instruction *next_insn; struct section *sec; u8 visited; int ret; - insn = first; sec = insn->sec; - if (insn->alt_group && list_empty(&insn->alts)) { - WARN_FUNC("don't know how to handle branch to middle of alternative instruction group", - sec, insn->offset); - return 1; - } - while (1) { next_insn = next_insn_same_sec(file, insn); @@ -2054,59 +2302,24 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, visited = 1 << state.uaccess; if (insn->visited) { - if (!insn->hint && !insn_state_match(insn, &state)) + if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) return 1; if (insn->visited & visited) return 0; } - if (insn->hint) { - if (insn->restore) { - struct instruction *save_insn, *i; - - i = insn; - save_insn = NULL; - sym_for_each_insn_continue_reverse(file, func, i) { - if (i->save) { - save_insn = i; - break; - } - } - - if (!save_insn) { - WARN_FUNC("no corresponding CFI save for CFI restore", - sec, insn->offset); - return 1; - } - - if (!save_insn->visited) { - /* - * Oops, no state to copy yet. - * Hopefully we can reach this - * instruction from another branch - * after the save insn has been - * visited. - */ - if (insn == first) - return 0; - - WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", - sec, insn->offset); - return 1; - } - - insn->state = save_insn->state; - } - - state = insn->state; + if (state.noinstr) + state.instr += insn->instr; - } else - insn->state = state; + if (insn->hint) + state.cfi = insn->cfi; + else + insn->cfi = state.cfi; insn->visited |= visited; - if (!insn->ignore_alts) { + if (!insn->ignore_alts && !list_empty(&insn->alts)) { bool skip_orig = false; list_for_each_entry(alt, &insn->alts, list) { @@ -2121,10 +2334,16 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } + if (insn->alt_group) + fill_alternative_cfi(file, insn); + if (skip_orig) return 0; } + if (handle_insn_ops(insn, &state)) + return 1; + switch (insn->type) { case INSN_RETURN: @@ -2191,32 +2410,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } return 0; - case INSN_STACK: - if (update_insn_state(insn, &state)) - return 1; - - if (insn->stack_op.dest.type == OP_DEST_PUSHF) { - if (!state.uaccess_stack) { - state.uaccess_stack = 1; - } else if (state.uaccess_stack >> 31) { - WARN_FUNC("PUSHF stack exhausted", sec, insn->offset); - return 1; - } - state.uaccess_stack <<= 1; - state.uaccess_stack |= state.uaccess; - } - - if (insn->stack_op.src.type == OP_SRC_POPF) { - if (state.uaccess_stack) { - state.uaccess = state.uaccess_stack & 1; - state.uaccess_stack >>= 1; - if (state.uaccess_stack == 1) - state.uaccess_stack = 0; - } - } - - break; - case INSN_STAC: if (state.uaccess) { WARN_FUNC("recursive UACCESS enable", sec, insn->offset); @@ -2262,7 +2455,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; if (!next_insn) { - if (state.cfa.base == CFI_UNDEFINED) + if (state.cfi.cfa.base == CFI_UNDEFINED) return 0; WARN("%s: unexpected end of section", sec->name); return 1; @@ -2274,24 +2467,34 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 0; } -static int validate_unwind_hints(struct objtool_file *file) +static int validate_unwind_hints(struct objtool_file *file, struct section *sec) { struct instruction *insn; - int ret, warnings = 0; struct insn_state state; + int ret, warnings = 0; if (!file->hints) return 0; - clear_insn_state(&state); + init_insn_state(&state, sec); - for_each_insn(file, insn) { + if (sec) { + insn = find_insn(file, sec, 0); + if (!insn) + return 0; + } else { + insn = list_first_entry(&file->insn_list, typeof(*insn), list); + } + + while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { if (insn->hint && !insn->visited) { ret = validate_branch(file, insn->func, insn, state); if (ret && backtrace) BT_FUNC("<=== (hint)", insn); warnings += ret; } + + insn = list_next_entry(insn, list); } return warnings; @@ -2406,43 +2609,69 @@ static bool ignore_unreachable_insn(struct instruction *insn) return false; } -static int validate_section(struct objtool_file *file, struct section *sec) +static int validate_symbol(struct objtool_file *file, struct section *sec, + struct symbol *sym, struct insn_state *state) { - struct symbol *func; struct instruction *insn; - struct insn_state state; - int ret, warnings = 0; + int ret; + + if (!sym->len) { + WARN("%s() is missing an ELF size annotation", sym->name); + return 1; + } + + if (sym->pfunc != sym || sym->alias != sym) + return 0; - clear_insn_state(&state); + insn = find_insn(file, sec, sym->offset); + if (!insn || insn->ignore || insn->visited) + return 0; + + state->uaccess = sym->uaccess_safe; - state.cfa = initial_func_cfi.cfa; - memcpy(&state.regs, &initial_func_cfi.regs, - CFI_NUM_REGS * sizeof(struct cfi_reg)); - state.stack_size = initial_func_cfi.cfa.offset; + ret = validate_branch(file, insn->func, insn, *state); + if (ret && backtrace) + BT_FUNC("<=== (sym)", insn); + return ret; +} + +static int validate_section(struct objtool_file *file, struct section *sec) +{ + struct insn_state state; + struct symbol *func; + int warnings = 0; list_for_each_entry(func, &sec->symbol_list, list) { if (func->type != STT_FUNC) continue; - if (!func->len) { - WARN("%s() is missing an ELF size annotation", - func->name); - warnings++; - } + init_insn_state(&state, sec); + state.cfi.cfa = initial_func_cfi.cfa; + memcpy(&state.cfi.regs, &initial_func_cfi.regs, + CFI_NUM_REGS * sizeof(struct cfi_reg)); + state.cfi.stack_size = initial_func_cfi.cfa.offset; - if (func->pfunc != func || func->alias != func) - continue; + warnings += validate_symbol(file, sec, func, &state); + } - insn = find_insn(file, sec, func->offset); - if (!insn || insn->ignore || insn->visited) - continue; + return warnings; +} - state.uaccess = func->uaccess_safe; +static int validate_vmlinux_functions(struct objtool_file *file) +{ + struct section *sec; + int warnings = 0; - ret = validate_branch(file, func, insn, state); - if (ret && backtrace) - BT_FUNC("<=== (func)", insn); - warnings += ret; + sec = find_section_by_name(file->elf, ".noinstr.text"); + if (sec) { + warnings += validate_section(file, sec); + warnings += validate_unwind_hints(file, sec); + } + + sec = find_section_by_name(file->elf, ".entry.text"); + if (sec) { + warnings += validate_section(file, sec); + warnings += validate_unwind_hints(file, sec); } return warnings; @@ -2453,8 +2682,12 @@ static int validate_functions(struct objtool_file *file) struct section *sec; int warnings = 0; - for_each_sec(file, sec) + for_each_sec(file, sec) { + if (!(sec->sh.sh_flags & SHF_EXECINSTR)) + continue; + warnings += validate_section(file, sec); + } return warnings; } @@ -2485,7 +2718,7 @@ int check(const char *_objname, bool orc) objname = _objname; - file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY); + file.elf = elf_open_read(objname, orc ? O_RDWR : O_RDONLY); if (!file.elf) return 1; @@ -2505,6 +2738,15 @@ int check(const char *_objname, bool orc) if (list_empty(&file.insn_list)) goto out; + if (vmlinux && !validate_dup) { + ret = validate_vmlinux_functions(&file); + if (ret < 0) + goto out; + + warnings += ret; + goto out; + } + if (retpoline) { ret = validate_retpoline(&file); if (ret < 0) @@ -2517,7 +2759,7 @@ int check(const char *_objname, bool orc) goto out; warnings += ret; - ret = validate_unwind_hints(&file); + ret = validate_unwind_hints(&file, NULL); if (ret < 0) goto out; warnings += ret; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index f0ce8ffe7135..906b5210f7ca 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -7,22 +7,16 @@ #define _CHECK_H #include <stdbool.h> -#include "elf.h" #include "cfi.h" #include "arch.h" -#include "orc.h" -#include <linux/hashtable.h> struct insn_state { - struct cfi_reg cfa; - struct cfi_reg regs[CFI_NUM_REGS]; - int stack_size; - unsigned char type; - bool bp_scratch; - bool drap, end, uaccess, df; + struct cfi_state cfi; unsigned int uaccess_stack; - int drap_reg, drap_offset; - struct cfi_reg vals[CFI_NUM_REGS]; + bool uaccess; + bool df; + bool noinstr; + s8 instr; }; struct instruction { @@ -33,29 +27,24 @@ struct instruction { unsigned int len; enum insn_type type; unsigned long immediate; - bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts; + bool dead_end, ignore, ignore_alts; + bool hint; bool retpoline_safe; + s8 instr; u8 visited; + u8 ret_offset; + int alt_group; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; struct rela *jump_table; struct list_head alts; struct symbol *func; - struct stack_op stack_op; - struct insn_state state; + struct list_head stack_ops; + struct cfi_state cfi; struct orc_entry orc; }; -struct objtool_file { - struct elf *elf; - struct list_head insn_list; - DECLARE_HASHTABLE(insn_hash, 20); - bool ignore_unreachables, c_file, hints, rodata; -}; - -int check(const char *objname, bool orc); - struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 09ddc8f1def3..84225679f96d 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -27,6 +27,22 @@ static inline u32 str_hash(const char *str) return jhash(str, strlen(str), 0); } +static inline int elf_hash_bits(void) +{ + return vmlinux ? ELF_HASH_BITS : 16; +} + +#define elf_hash_add(hashtable, node, key) \ + hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())]) + +static void elf_hash_init(struct hlist_head *table) +{ + __hash_init(table, 1U << elf_hash_bits()); +} + +#define elf_hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member) + static void rb_add(struct rb_root *tree, struct rb_node *node, int (*cmp)(struct rb_node *, const struct rb_node *)) { @@ -45,7 +61,7 @@ static void rb_add(struct rb_root *tree, struct rb_node *node, rb_insert_color(node, tree); } -static struct rb_node *rb_find_first(struct rb_root *tree, const void *key, +static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key, int (*cmp)(const void *key, const struct rb_node *)) { struct rb_node *node = tree->rb_node; @@ -105,17 +121,17 @@ static int symbol_by_offset(const void *key, const struct rb_node *node) if (*o < s->offset) return -1; - if (*o > s->offset + s->len) + if (*o >= s->offset + s->len) return 1; return 0; } -struct section *find_section_by_name(struct elf *elf, const char *name) +struct section *find_section_by_name(const struct elf *elf, const char *name) { struct section *sec; - hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name)) + elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name)) if (!strcmp(sec->name, name)) return sec; @@ -127,7 +143,7 @@ static struct section *find_section_by_index(struct elf *elf, { struct section *sec; - hash_for_each_possible(elf->section_hash, sec, hash, idx) + elf_hash_for_each_possible(elf->section_hash, sec, hash, idx) if (sec->idx == idx) return sec; @@ -138,7 +154,7 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) { struct symbol *sym; - hash_for_each_possible(elf->symbol_hash, sym, hash, idx) + elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx) if (sym->idx == idx) return sym; @@ -173,7 +189,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset) return NULL; } -struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) +struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset) { struct rb_node *node; @@ -201,18 +217,18 @@ struct symbol *find_func_containing(struct section *sec, unsigned long offset) return NULL; } -struct symbol *find_symbol_by_name(struct elf *elf, const char *name) +struct symbol *find_symbol_by_name(const struct elf *elf, const char *name) { struct symbol *sym; - hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name)) + elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name)) if (!strcmp(sym->name, name)) return sym; return NULL; } -struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, +struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len) { struct rela *rela, *r = NULL; @@ -224,7 +240,7 @@ struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, sec = sec->rela; for_offset_range(o, offset, offset + len) { - hash_for_each_possible(elf->rela_hash, rela, hash, + elf_hash_for_each_possible(elf->rela_hash, rela, hash, sec_offset_hash(sec, o)) { if (rela->sec != sec) continue; @@ -241,7 +257,7 @@ struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, return NULL; } -struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset) +struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset) { return find_rela_by_dest_range(elf, sec, offset, 1); } @@ -309,8 +325,8 @@ static int read_sections(struct elf *elf) sec->len = sec->sh.sh_size; list_add_tail(&sec->list, &elf->sections); - hash_add(elf->section_hash, &sec->hash, sec->idx); - hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); + elf_hash_add(elf->section_hash, &sec->hash, sec->idx); + elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); } if (stats) @@ -327,12 +343,14 @@ static int read_sections(struct elf *elf) static int read_symbols(struct elf *elf) { - struct section *symtab, *sec; + struct section *symtab, *symtab_shndx, *sec; struct symbol *sym, *pfunc; struct list_head *entry; struct rb_node *pnode; int symbols_nr, i; char *coldstr; + Elf_Data *shndx_data = NULL; + Elf32_Word shndx; symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { @@ -340,6 +358,10 @@ static int read_symbols(struct elf *elf) return -1; } + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + if (symtab_shndx) + shndx_data = symtab_shndx->data; + symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize; for (i = 0; i < symbols_nr; i++) { @@ -353,8 +375,9 @@ static int read_symbols(struct elf *elf) sym->idx = i; - if (!gelf_getsym(symtab->data, i, &sym->sym)) { - WARN_ELF("gelf_getsym"); + if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym, + &shndx)) { + WARN_ELF("gelf_getsymshndx"); goto err; } @@ -368,10 +391,13 @@ static int read_symbols(struct elf *elf) sym->type = GELF_ST_TYPE(sym->sym.st_info); sym->bind = GELF_ST_BIND(sym->sym.st_info); - if (sym->sym.st_shndx > SHN_UNDEF && - sym->sym.st_shndx < SHN_LORESERVE) { - sym->sec = find_section_by_index(elf, - sym->sym.st_shndx); + if ((sym->sym.st_shndx > SHN_UNDEF && + sym->sym.st_shndx < SHN_LORESERVE) || + (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) { + if (sym->sym.st_shndx != SHN_XINDEX) + shndx = sym->sym.st_shndx; + + sym->sec = find_section_by_index(elf, shndx); if (!sym->sec) { WARN("couldn't find section for symbol %s", sym->name); @@ -394,8 +420,8 @@ static int read_symbols(struct elf *elf) else entry = &sym->sec->symbol_list; list_add(&sym->list, entry); - hash_add(elf->symbol_hash, &sym->hash, sym->idx); - hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); + elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); } if (stats) @@ -456,6 +482,14 @@ err: return -1; } +void elf_add_rela(struct elf *elf, struct rela *rela) +{ + struct section *sec = rela->sec; + + list_add_tail(&rela->list, &sec->rela_list); + elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela)); +} + static int read_relas(struct elf *elf) { struct section *sec; @@ -503,8 +537,7 @@ static int read_relas(struct elf *elf) return -1; } - list_add_tail(&rela->list, &sec->rela_list); - hash_add(elf->rela_hash, &rela->hash, rela_hash(rela)); + elf_add_rela(elf, rela); nr_rela++; } max_rela = max(max_rela, nr_rela); @@ -519,7 +552,7 @@ static int read_relas(struct elf *elf) return 0; } -struct elf *elf_read(const char *name, int flags) +struct elf *elf_open_read(const char *name, int flags) { struct elf *elf; Elf_Cmd cmd; @@ -531,15 +564,16 @@ struct elf *elf_read(const char *name, int flags) perror("malloc"); return NULL; } - memset(elf, 0, sizeof(*elf)); + memset(elf, 0, offsetof(struct elf, sections)); - hash_init(elf->symbol_hash); - hash_init(elf->symbol_name_hash); - hash_init(elf->section_hash); - hash_init(elf->section_name_hash); - hash_init(elf->rela_hash); INIT_LIST_HEAD(&elf->sections); + elf_hash_init(elf->symbol_hash); + elf_hash_init(elf->symbol_name_hash); + elf_hash_init(elf->section_hash); + elf_hash_init(elf->section_name_hash); + elf_hash_init(elf->rela_hash); + elf->fd = open(name, flags); if (elf->fd == -1) { fprintf(stderr, "objtool: Can't open '%s': %s\n", @@ -676,8 +710,8 @@ struct section *elf_create_section(struct elf *elf, const char *name, shstrtab->changed = true; list_add_tail(&sec->list, &elf->sections); - hash_add(elf->section_hash, &sec->hash, sec->idx); - hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); + elf_hash_add(elf->section_hash, &sec->hash, sec->idx); + elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); return sec; } @@ -745,7 +779,7 @@ int elf_rebuild_rela_section(struct section *sec) return 0; } -int elf_write(struct elf *elf) +int elf_write(const struct elf *elf) { struct section *sec; Elf_Scn *s; diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index ebbb10c61e24..f4fe1d6ea392 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -39,7 +39,7 @@ struct section { char *name; int idx; unsigned int len; - bool changed, text, rodata; + bool changed, text, rodata, noinstr; }; struct symbol { @@ -70,26 +70,29 @@ struct rela { bool jump_table_start; }; +#define ELF_HASH_BITS 20 + struct elf { Elf *elf; GElf_Ehdr ehdr; int fd; char *name; struct list_head sections; - DECLARE_HASHTABLE(symbol_hash, 20); - DECLARE_HASHTABLE(symbol_name_hash, 20); - DECLARE_HASHTABLE(section_hash, 16); - DECLARE_HASHTABLE(section_name_hash, 16); - DECLARE_HASHTABLE(rela_hash, 20); + DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(rela_hash, ELF_HASH_BITS); }; #define OFFSET_STRIDE_BITS 4 #define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS) #define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1)) -#define for_offset_range(_offset, _start, _end) \ - for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ - _offset <= ((_end) & OFFSET_STRIDE_MASK); \ +#define for_offset_range(_offset, _start, _end) \ + for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ + _offset >= ((_start) & OFFSET_STRIDE_MASK) && \ + _offset <= ((_end) & OFFSET_STRIDE_MASK); \ _offset += OFFSET_STRIDE) static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) @@ -99,7 +102,7 @@ static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) offset &= OFFSET_STRIDE_MASK; ol = offset; - oh = offset >> 32; + oh = (offset >> 16) >> 16; __jhash_mix(ol, oh, idx); @@ -111,22 +114,23 @@ static inline u32 rela_hash(struct rela *rela) return sec_offset_hash(rela->sec, rela->offset); } -struct elf *elf_read(const char *name, int flags); -struct section *find_section_by_name(struct elf *elf, const char *name); +struct elf *elf_open_read(const char *name, int flags); +struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr); +struct section *elf_create_rela_section(struct elf *elf, struct section *base); +void elf_add_rela(struct elf *elf, struct rela *rela); +int elf_write(const struct elf *elf); +void elf_close(struct elf *elf); + +struct section *find_section_by_name(const struct elf *elf, const char *name); struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); -struct symbol *find_symbol_by_name(struct elf *elf, const char *name); -struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); -struct rela *find_rela_by_dest(struct elf *elf, struct section *sec, unsigned long offset); -struct rela *find_rela_by_dest_range(struct elf *elf, struct section *sec, +struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); +struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); +struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); struct symbol *find_func_containing(struct section *sec, unsigned long offset); -struct section *elf_create_section(struct elf *elf, const char *name, size_t - entsize, int nr); -struct section *elf_create_rela_section(struct elf *elf, struct section *base); int elf_rebuild_rela_section(struct section *sec); -int elf_write(struct elf *elf); -void elf_close(struct elf *elf); #define for_each_sec(file, sec) \ list_for_each_entry(sec, &file->elf->sections, list) diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 0b3528f05053..58fdda510653 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -58,7 +58,9 @@ static void cmd_usage(void) printf("\n"); - exit(129); + if (!help) + exit(129); + exit(0); } static void handle_options(int *argc, const char ***argv) diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h new file mode 100644 index 000000000000..528028a66816 --- /dev/null +++ b/tools/objtool/objtool.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com> + */ + +#ifndef _OBJTOOL_H +#define _OBJTOOL_H + +#include <stdbool.h> +#include <linux/list.h> +#include <linux/hashtable.h> + +#include "elf.h" + +struct objtool_file { + struct elf *elf; + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); + bool ignore_unreachables, c_file, hints, rodata; +}; + +int check(const char *objname, bool orc); +int orc_dump(const char *objname); +int create_orc(struct objtool_file *file); +int create_orc_sections(struct objtool_file *file); + +#endif /* _OBJTOOL_H */ diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h deleted file mode 100644 index ee2832221e62..000000000000 --- a/tools/objtool/orc.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com> - */ - -#ifndef _ORC_H -#define _ORC_H - -#include <asm/orc_types.h> - -struct objtool_file; - -int create_orc(struct objtool_file *file); -int create_orc_sections(struct objtool_file *file); - -int orc_dump(const char *objname); - -#endif /* _ORC_H */ diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index ba4cbb1cdd63..fca46e006fc2 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -4,7 +4,8 @@ */ #include <unistd.h> -#include "orc.h" +#include <asm/orc_types.h> +#include "objtool.h" #include "warn.h" static const char *reg_name(unsigned int reg) diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 4c0dabd28000..c9549988121a 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -6,7 +6,6 @@ #include <stdlib.h> #include <string.h> -#include "orc.h" #include "check.h" #include "warn.h" @@ -16,10 +15,10 @@ int create_orc(struct objtool_file *file) for_each_insn(file, insn) { struct orc_entry *orc = &insn->orc; - struct cfi_reg *cfa = &insn->state.cfa; - struct cfi_reg *bp = &insn->state.regs[CFI_BP]; + struct cfi_reg *cfa = &insn->cfi.cfa; + struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; - orc->end = insn->state.end; + orc->end = insn->cfi.end; if (cfa->base == CFI_UNDEFINED) { orc->sp_reg = ORC_REG_UNDEFINED; @@ -75,7 +74,7 @@ int create_orc(struct objtool_file *file) orc->sp_offset = cfa->offset; orc->bp_offset = bp->offset; - orc->type = insn->state.type; + orc->type = insn->cfi.type; } return 0; @@ -130,8 +129,7 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti rela->offset = idx * sizeof(int); rela->sec = ip_relasec; - list_add_tail(&rela->list, &ip_relasec->rela_list); - hash_add(elf->rela_hash, &rela->hash, rela_hash(rela)); + elf_add_rela(elf, rela); return 0; } diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c new file mode 100644 index 000000000000..942ea5e8ac36 --- /dev/null +++ b/tools/objtool/weak.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com> + * Weak definitions necessary to compile objtool without + * some subcommands (e.g. check, orc). + */ + +#include <stdbool.h> +#include <errno.h> +#include "objtool.h" + +#define __weak __attribute__((weak)) + +#define UNSUPPORTED(name) \ +({ \ + fprintf(stderr, "error: objtool: " name " not implemented\n"); \ + return ENOSYS; \ +}) + +const char __weak *objname; + +int __weak check(const char *_objname, bool orc) +{ + UNSUPPORTED("check subcommand"); +} + +int __weak orc_dump(const char *_objname) +{ + UNSUPPORTED("orc"); +} + +int __weak create_orc(struct objtool_file *file) +{ + UNSUPPORTED("orc"); +} + +int __weak create_orc_sections(struct objtool_file *file) +{ + UNSUPPORTED("orc"); +} diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 31824d5269cc..6e54979c2124 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -48,7 +48,7 @@ man5dir=$(mandir)/man5 man7dir=$(mandir)/man7 ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf +ASCIIDOC_EXTRA += --unsafe -f asciidoc.conf ASCIIDOC_HTML = xhtml11 MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = @@ -59,7 +59,7 @@ HTML_REF = origin/html ifdef USE_ASCIIDOCTOR ASCIIDOC = asciidoctor -ASCIIDOC_EXTRA = -a compat-mode +ASCIIDOC_EXTRA += -a compat-mode ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual" ASCIIDOC_HTML = xhtml5 diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 82ff7dad40c2..271484754fee 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -10,7 +10,9 @@ e synthesize error events d create a debug log g synthesize a call chain (use with i or x) + G synthesize a call chain on existing event records l synthesize last branch entries (use with i or x) + L synthesize last branch entries on existing event records s skip initial number of events The default is all events i.e. the same as --itrace=ibxwpe, @@ -31,6 +33,10 @@ Also the number of last branch entries (default 64, max. 1024) for instructions or transactions events can be specified. + Similar to options g and l, size may also be specified for options G and L. + On x86, note that G and L work poorly when data has been recorded with + large PEBS. Refer linkperf:perf-intel-pt[1] man page for details. + It is also possible to skip events generated (instructions, branches, transactions, ptwrite, power) at the beginning. This is useful to ignore initialization code. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 0921a3c67381..bad16512c48d 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -61,6 +61,9 @@ SUBSYSTEM 'epoll':: Eventpoll (epoll) stressing benchmarks. +'internals':: + Benchmark internal perf functionality. + 'all':: All benchmark subsystems. @@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls. *ctl*:: Suite for evaluating multiple epoll_ctl calls. +SUITES FOR 'internals' +~~~~~~~~~~~~~~~~~~~~~~ +*synthesize*:: +Suite for evaluating perf's event synthesis performance. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index e6150f21267d..2133eb320cb0 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -111,6 +111,17 @@ REPORT OPTIONS --display:: Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The perf.data file must have been obtained using + perf c2c record --call-graph lbr. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + C2C RECORD ---------- The perf c2c record command setup options related to HITM cacheline analysis diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 456fdcbf26ac..eb8b7d42591a 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -69,22 +69,22 @@ And profiled with 'perf report' e.g. To also trace kernel space presents a problem, namely kernel self-modifying code. A fairly good kernel image is available in /proc/kcore but to get an accurate image a copy of /proc/kcore needs to be made under the same conditions -as the data capture. A script perf-with-kcore can do that, but beware that the -script makes use of 'sudo' to copy /proc/kcore. If you have perf installed -locally from the source tree you can do: +as the data capture. 'perf record' can make a copy of /proc/kcore if the option +--kcore is used, but access to /proc/kcore is restricted e.g. - ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + sudo perf record -o pt_ls --kcore -e intel_pt// -- ls -which will create a directory named 'pt_ls' and put the perf.data file and -copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use -'perf report' becomes: +which will create a directory named 'pt_ls' and put the perf.data file (named +simply 'data') and copies of /proc/kcore, /proc/kallsyms and /proc/modules into +it. The other tools understand the directory format, so to use 'perf report' +becomes: - ~/libexec/perf-core/perf-with-kcore report pt_ls + sudo perf report -i pt_ls Because samples are synthesized after-the-fact, the sampling period can be selected for reporting. e.g. sample every microsecond - ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + sudo perf report pt_ls --itrace=i1usge See the sections below for more information about the --itrace option. @@ -821,7 +821,9 @@ The letters are: e synthesize tracing error events d create a debug log g synthesize a call chain (use with i or x) + G synthesize a call chain on existing event records l synthesize last branch entries (use with i or x) + L synthesize last branch entries on existing event records s skip initial number of events "Instructions" events look like they were recorded by "perf record -e @@ -912,6 +914,39 @@ transactions events can be specified. e.g. Note that last branch entries are cleared for each sample, so there is no overlap from one sample to the next. +The G and L options are designed in particular for sample mode, and work much +like g and l but add call chain and branch stack to the other selected events +instead of synthesized events. For example, to record branch-misses events for +'ls' and then add a call chain derived from the Intel PT trace: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' -- ls + perf report --itrace=Ge + +Although in fact G is a default for perf report, so that is the same as just: + + perf report + +One caveat with the G and L options is that they work poorly with "Large PEBS". +Large PEBS means PEBS records will be accumulated by hardware and the written +into the event buffer in one go. That reduces interrupts, but can give very +late timestamps. Because the Intel PT trace is synchronized by timestamps, +the PEBS events do not match the trace. Currently, Large PEBS is used only in +certain circumstances: + - hardware supports it + - PEBS is used + - event period is specified, instead of frequency + - the sample type is limited to the following flags: + PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | + PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | + PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_PERIOD (and sometimes) | PERF_SAMPLE_TIME +Because Intel PT sample mode uses a different sample type to the list above, +Large PEBS is not used with Intel PT sample mode. To avoid Large PEBS in other +cases, avoid specifying the event period i.e. avoid the 'perf record' -c option, +--count option, or 'period' config term. + To disable trace decoding entirely, use the option --no-itrace. It is also possible to skip events generated (instructions, branches, transactions) diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 6345db33c533..376a50b3452d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -115,6 +115,11 @@ raw encoding of 0x1A8 can be used: perf stat -e r1a8 -a sleep 1 perf record -e r1a8 ... +It's also possible to use pmu syntax: + + perf record -e r1a8 -a sleep 1 + perf record -e cpu/r1a8/ ... + You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. @@ -258,6 +263,9 @@ Normally all events in an event group sample, but with :S only the first event (the leader) samples, and it only reads the values of the other events in the group. +However, in the case AUX area events (e.g. Intel PT or CoreSight), the AUX +area event must be the leader, so then the second event samples, not the first. + OPTIONS ------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b3f3b3f1c161..561ef55743e2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -556,6 +556,19 @@ overhead. You can still switch them on with: --switch-output --no-no-buildid --no-no-buildid-cache +--switch-output-event:: +Events that will cause the switch of the perf.data file, auto-selecting +--switch-output=signal, the results are similar as internally the side band +thread will also send a SIGUSR2 to the main one. + +Uses the same syntax as --event, it will just not be recorded, serving only to +switch the perf.data file as soon as the --switch-output event is processed by +a separate sideband thread. + +This sideband thread is also used to other purposes, like processing the +PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF +information, etc. + --switch-max-files=N:: When rotating perf.data with --switch-output, only keep N files. @@ -596,6 +609,10 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file Limit the sample data max size, <size> is expected to be a number with appended unit character - B/K/M/G +--num-thread-synthesize:: + The number of threads to run when synthesizing events for existing processes. + By default, the number of threads equals 1. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f569b9ea4002..d068103690cc 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -488,6 +488,17 @@ include::itrace.txt[] This option extends the perf report to show reference callgraphs, which collected by reference event, in no callgraph event. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The perf.data file must have been obtained using + perf record --call-graph lbr. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + --socket-filter:: Only report the samples on the processor socket that match with this filter diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 963487e82edc..372dfd110e6d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -440,6 +440,17 @@ include::itrace.txt[] --show-on-off-events:: Show the --switch-on/off events too. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The perf.data file must have been obtained using + perf record --call-graph lbr. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4d56586b2fb9..3fb5028aef08 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -176,6 +176,8 @@ Print count deltas every N milliseconds (minimum: 1ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +If the metric exists, it is calculated by the counts generated in this interval and the metric is printed after #. + --interval-count times:: Print count deltas for fixed number of times. This option should be used together with "-I" option. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 487737a725e9..20227dabc208 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -319,6 +319,15 @@ Default is to monitor all CPUS. go straight to the histogram browser, just like 'perf top' with no events explicitely specified does. +--stitch-lbr:: + Show callgraph with stitched LBRs, which may have more complete + callgraph. The option must be used with --call-graph lbr recording. + Disabled by default. In common cases with call stack overflows, + it can recreate better call stacks than the default lbr call stack + output. But this approach is not full proof. There can be cases + where it creates incorrect call stacks from incorrect matches. + The known limitations include exception handing such as + setjmp/longjmp will have calls/returns not match. INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b0152e1095c5..b6472e463284 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -373,6 +373,22 @@ struct { Indicates that trace contains records of PERF_RECORD_COMPRESSED type that have perf_events records in compressed form. + HEADER_CPU_PMU_CAPS = 28, + + A list of cpu PMU capabilities. The format of data is as below. + +struct { + u32 nr_cpu_pmu_caps; + { + char name[]; + char value[]; + } [nr_cpu_pmu_caps] +}; + + +Example: + cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index d15a311408f1..94a495594e99 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -188,7 +188,7 @@ AWK = awk # non-config cases config := 1 -NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf +NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) @@ -832,7 +832,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html # 'make doc' should call 'make -C Documentation all' $(DOC_TARGETS): - $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA) TAG_FOLDERS= . ../lib ../include TAG_FILES= ../../include/uapi/linux/perf_event.h @@ -959,7 +959,7 @@ install-python_ext: # 'make install-doc' should call 'make -C Documentation install' $(INSTALL_DOC_TARGETS): - $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) + $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA) ### Cleaning rules diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 941f814820b8..97aa02c4491d 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -23,6 +23,7 @@ #include "../../util/event.h" #include "../../util/evlist.h" #include "../../util/evsel.h" +#include "../../util/perf_api_probe.h" #include "../../util/evsel_config.h" #include "../../util/pmu.h" #include "../../util/cs-etm.h" @@ -232,7 +233,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, ret = perf_pmu__scan_file(pmu, path, "%x", &hash); if (ret != 1) { pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n", - sink, perf_evsel__name(evsel), errno, + sink, evsel__name(evsel), errno, str_error_r(errno, msg, sizeof(msg))); return ret; } @@ -401,7 +402,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, * when a context switch happened. */ if (!perf_cpu_map__empty(cpus)) { - perf_evsel__set_sample_bit(cs_etm_evsel, CPU); + evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, ETM_OPT_CTXTID | ETM_OPT_TS); @@ -425,7 +426,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, /* In per-cpu case, always need the time of mmap events etc */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); } out: diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 27653be24447..e3593063b3d1 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -120,9 +120,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, */ perf_evlist__to_front(evlist, arm_spe_evsel); - perf_evsel__set_sample_bit(arm_spe_evsel, CPU); - perf_evsel__set_sample_bit(arm_spe_evsel, TIME); - perf_evsel__set_sample_bit(arm_spe_evsel, TID); + evsel__set_sample_bit(arm_spe_evsel, CPU); + evsel__set_sample_bit(arm_spe_evsel, TIME); + evsel__set_sample_bit(arm_spe_evsel, TID); /* Add dummy event to keep tracking */ err = parse_events(evlist, "dummy:u", NULL); @@ -134,9 +134,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - perf_evsel__set_sample_bit(tracking_evsel, TIME); - perf_evsel__set_sample_bit(tracking_evsel, CPU); - perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, CPU); + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); return 0; } diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 3b4cdfc5efd6..d4870074f14c 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -7,6 +7,8 @@ #include <string.h> #include <linux/stringify.h> #include "header.h" +#include "metricgroup.h" +#include <api/fs/fs.h> #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ @@ -44,3 +46,9 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } + +int arch_get_runtimeparam(void) +{ + int count; + return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count; +} diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 16807269317c..eed9e5a42935 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -39,7 +39,7 @@ static void hcall_event_get_key(struct evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, "req"); + key->key = evsel__intval(evsel, sample, "req"); } static const char *get_hcall_exit_reason(u64 exit_code) diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index 0fd4e9f49ed0..34da89ced29a 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -30,7 +30,7 @@ static void event_icpt_insn_get_key(struct evsel *evsel, { unsigned long insn; - insn = perf_evsel__intval(evsel, sample, "instruction"); + insn = evsel__intval(evsel, sample, "instruction"); key->key = icpt_insn_decoder(insn); key->exit_reasons = sie_icpt_insn_codes; } @@ -39,7 +39,7 @@ static void event_sigp_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "order_code"); + key->key = evsel__intval(evsel, sample, "order_code"); key->exit_reasons = sie_sigp_order_codes; } @@ -47,7 +47,7 @@ static void event_diag_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "code"); + key->key = evsel__intval(evsel, sample, "code"); key->exit_reasons = sie_diagnose_codes; } @@ -55,7 +55,7 @@ static void event_icpt_prog_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "code"); + key->key = evsel__intval(evsel, sample, "code"); key->exit_reasons = sie_icpt_prog_codes; } diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 909ead08a6f6..026d32ed078e 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -130,13 +130,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { - CHECK__(perf_evsel__parse_sample(evsel, event, - &sample)); + CHECK__(evsel__parse_sample(evsel, event, &sample)); comm1_time = sample.time; } if (strcmp(event->comm.comm, comm2) == 0) { - CHECK__(perf_evsel__parse_sample(evsel, event, - &sample)); + CHECK__(evsel__parse_sample(evsel, event, &sample)); comm2_time = sample.time; } next_event: diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 09f93800bffd..0dc09b5809c1 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr, * AUX event. */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(intel_bts_evsel, CPU); + evsel__set_sample_bit(intel_bts_evsel, CPU); } /* Add dummy event to keep tracking */ diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 1643aed8c4c8..3f7c20cc7b79 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -25,6 +25,7 @@ #include "../../../util/pmu.h" #include "../../../util/debug.h" #include "../../../util/auxtrace.h" +#include "../../../util/perf_api_probe.h" #include "../../../util/record.h" #include "../../../util/target.h" #include "../../../util/tsc.h" @@ -420,8 +421,8 @@ static int intel_pt_track_switches(struct evlist *evlist) evsel = evlist__last(evlist); - perf_evsel__set_sample_bit(evsel, CPU); - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); evsel->core.system_wide = true; evsel->no_aux_samples = true; @@ -801,10 +802,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, switch_evsel->no_aux_samples = true; switch_evsel->immediate = true; - perf_evsel__set_sample_bit(switch_evsel, TID); - perf_evsel__set_sample_bit(switch_evsel, TIME); - perf_evsel__set_sample_bit(switch_evsel, CPU); - perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); + evsel__set_sample_bit(switch_evsel, TID); + evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); opts->record_switch_events = false; ptr->have_sched_switch = 3; @@ -838,7 +839,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, * AUX event. */ if (!perf_cpu_map__empty(cpus)) - perf_evsel__set_sample_bit(intel_pt_evsel, CPU); + evsel__set_sample_bit(intel_pt_evsel, CPU); } /* Add dummy event to keep tracking */ @@ -862,11 +863,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, /* In per-cpu case, always need the time of mmap events etc */ if (!perf_cpu_map__empty(cpus)) { - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); /* And the CPU for switch events */ - perf_evsel__set_sample_bit(tracking_evsel, CPU); + evsel__set_sample_bit(tracking_evsel, CPU); } - perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); } /* diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index c0775c39227f..072920475b65 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -31,8 +31,8 @@ const char *kvm_exit_trace = "kvm:kvm_exit"; static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "gpa"); - key->info = perf_evsel__intval(evsel, sample, "type"); + key->key = evsel__intval(evsel, sample, "gpa"); + key->info = evsel__intval(evsel, sample, "type"); } #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 @@ -48,7 +48,7 @@ static bool mmio_event_begin(struct evsel *evsel, /* MMIO write begin event in kernel. */ if (!strcmp(evsel->name, "kvm:kvm_mmio") && - perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { + evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) { mmio_event_get_key(evsel, sample, key); return true; } @@ -65,7 +65,7 @@ static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample, /* MMIO read end event in kernel.*/ if (!strcmp(evsel->name, "kvm:kvm_mmio") && - perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { + evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) { mmio_event_get_key(evsel, sample, key); return true; } @@ -94,8 +94,8 @@ static void ioport_event_get_key(struct evsel *evsel, struct perf_sample *sample, struct event_key *key) { - key->key = perf_evsel__intval(evsel, sample, "port"); - key->info = perf_evsel__intval(evsel, sample, "rw"); + key->key = evsel__intval(evsel, sample, "port"); + key->info = evsel__intval(evsel, sample, "rw"); } static bool ioport_event_begin(struct evsel *evsel, diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index e4e321b6f883..768e408757a0 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -6,9 +6,10 @@ perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o - perf-y += epoll-wait.o perf-y += epoll-ctl.o +perf-y += synthesize.o +perf-y += kallsyms-parse.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 4aa6de1aa67d..61cae4966cae 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -41,9 +41,10 @@ int bench_futex_wake_parallel(int argc, const char **argv); int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv); - int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); +int bench_synthesize(int argc, const char **argv); +int bench_kallsyms_parse(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index f938c585d512..cf797362675b 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv) qsort(worker, nthreads, sizeof(struct worker), cmpworker); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 65eebe06c04d..915bf3da7ce2 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); if (!silent) { if (nfutexes == 1) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 89fd8f325f38..bb25d8beb3b8 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops / bench__runtime.tv_sec; + unsigned long t = bench__runtime.tv_sec > 0 ? + worker[i].ops / bench__runtime.tv_sec : 0; update_stats(&throughput_stats, t); if (!silent) diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c new file mode 100644 index 000000000000..2b0d0f980ae9 --- /dev/null +++ b/tools/perf/bench/kallsyms-parse.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark of /proc/kallsyms parsing. + * + * Copyright 2020 Google LLC. + */ +#include <stdlib.h> +#include "bench.h" +#include "../util/stat.h" +#include <linux/time64.h> +#include <subcmd/parse-options.h> +#include <symbol/kallsyms.h> + +static unsigned int iterations = 100; + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals kallsyms-parse <options>", + NULL +}; + +static int bench_process_symbol(void *arg __maybe_unused, + const char *name __maybe_unused, + char type __maybe_unused, + u64 start __maybe_unused) +{ + return 0; +} + +static int do_kallsyms_parse(void) +{ + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev; + int err; + struct stats time_stats; + + init_stats(&time_stats); + + for (i = 0; i < iterations; i++) { + gettimeofday(&start, NULL); + err = kallsyms__parse("/proc/kallsyms", NULL, + bench_process_symbol); + if (err) + return err; + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + } + + time_average = avg_stats(&time_stats) / USEC_PER_MSEC; + time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; + printf(" Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n", + time_average, time_stddev); + return 0; +} + +int bench_kallsyms_parse(int argc, const char **argv) +{ + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + return do_kallsyms_parse(); +} diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c new file mode 100644 index 000000000000..8d624aea1c5e --- /dev/null +++ b/tools/perf/bench/synthesize.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark synthesis of perf events such as at the start of a 'perf + * record'. Synthesis is done on the current process and the 'dummy' event + * handlers are invoked that support dump_trace but otherwise do nothing. + * + * Copyright 2019 Google LLC. + */ +#include <stdio.h> +#include "bench.h" +#include "../util/debug.h" +#include "../util/session.h" +#include "../util/stat.h" +#include "../util/synthetic-events.h" +#include "../util/target.h" +#include "../util/thread_map.h" +#include "../util/tool.h" +#include "../util/util.h" +#include <linux/atomic.h> +#include <linux/err.h> +#include <linux/time64.h> +#include <subcmd/parse-options.h> + +static unsigned int min_threads = 1; +static unsigned int max_threads = UINT_MAX; +static unsigned int single_iterations = 10000; +static unsigned int multi_iterations = 10; +static bool run_st; +static bool run_mt; + +static const struct option options[] = { + OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"), + OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"), + OPT_UINTEGER('m', "min-threads", &min_threads, + "Minimum number of threads in multithreaded bench"), + OPT_UINTEGER('M', "max-threads", &max_threads, + "Maximum number of threads in multithreaded bench"), + OPT_UINTEGER('i', "single-iterations", &single_iterations, + "Number of iterations used to compute single-threaded average"), + OPT_UINTEGER('I', "multi-iterations", &multi_iterations, + "Number of iterations used to compute multi-threaded average"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals synthesize <options>", + NULL +}; + +static atomic_t event_count; + +static int process_synthesized_event(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + atomic_inc(&event_count); + return 0; +} + +static int do_run_single_threaded(struct perf_session *session, + struct perf_thread_map *threads, + struct target *target, bool data_mmap) +{ + const unsigned int nr_threads_synthesize = 1; + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev, event_average, event_stddev; + int err; + struct stats time_stats, event_stats; + + init_stats(&time_stats); + init_stats(&event_stats); + + for (i = 0; i < single_iterations; i++) { + atomic_set(&event_count, 0); + gettimeofday(&start, NULL); + err = __machine__synthesize_threads(&session->machines.host, + NULL, + target, threads, + process_synthesized_event, + data_mmap, + nr_threads_synthesize); + if (err) + return err; + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&event_stats, atomic_read(&event_count)); + } + + time_average = avg_stats(&time_stats); + time_stddev = stddev_stats(&time_stats); + printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n", + data_mmap ? "data " : "", time_average, time_stddev); + + event_average = avg_stats(&event_stats); + event_stddev = stddev_stats(&event_stats); + printf(" Average num. events: %.3f (+- %.3f)\n", + event_average, event_stddev); + + printf(" Average time per event %.3f usec\n", + time_average / event_average); + return 0; +} + +static int run_single_threaded(void) +{ + struct perf_session *session; + struct target target = { + .pid = "self", + }; + struct perf_thread_map *threads; + int err; + + perf_set_singlethreaded(); + session = perf_session__new(NULL, false, NULL); + if (IS_ERR(session)) { + pr_err("Session creation failed.\n"); + return PTR_ERR(session); + } + threads = thread_map__new_by_pid(getpid()); + if (!threads) { + pr_err("Thread map creation failed.\n"); + err = -ENOMEM; + goto err_out; + } + + puts( +"Computing performance of single threaded perf event synthesis by\n" +"synthesizing events on the perf process itself:"); + + err = do_run_single_threaded(session, threads, &target, false); + if (err) + goto err_out; + + err = do_run_single_threaded(session, threads, &target, true); + +err_out: + if (threads) + perf_thread_map__put(threads); + + perf_session__delete(session); + return err; +} + +static int do_run_multi_threaded(struct target *target, + unsigned int nr_threads_synthesize) +{ + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev, event_average, event_stddev; + int err; + struct stats time_stats, event_stats; + struct perf_session *session; + + init_stats(&time_stats); + init_stats(&event_stats); + for (i = 0; i < multi_iterations; i++) { + session = perf_session__new(NULL, false, NULL); + if (!session) + return -ENOMEM; + + atomic_set(&event_count, 0); + gettimeofday(&start, NULL); + err = __machine__synthesize_threads(&session->machines.host, + NULL, + target, NULL, + process_synthesized_event, + false, + nr_threads_synthesize); + if (err) { + perf_session__delete(session); + return err; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&event_stats, atomic_read(&event_count)); + perf_session__delete(session); + } + + time_average = avg_stats(&time_stats); + time_stddev = stddev_stats(&time_stats); + printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + event_average = avg_stats(&event_stats); + event_stddev = stddev_stats(&event_stats); + printf(" Average num. events: %.3f (+- %.3f)\n", + event_average, event_stddev); + + printf(" Average time per event %.3f usec\n", + time_average / event_average); + return 0; +} + +static int run_multi_threaded(void) +{ + struct target target = { + .cpu_list = "0" + }; + unsigned int nr_threads_synthesize; + int err; + + if (max_threads == UINT_MAX) + max_threads = sysconf(_SC_NPROCESSORS_ONLN); + + puts( +"Computing performance of multi threaded perf event synthesis by\n" +"synthesizing events on CPU 0:"); + + for (nr_threads_synthesize = min_threads; + nr_threads_synthesize <= max_threads; + nr_threads_synthesize++) { + if (nr_threads_synthesize == 1) + perf_set_singlethreaded(); + else + perf_set_multithreaded(); + + printf(" Number of synthesis threads: %u\n", + nr_threads_synthesize); + + err = do_run_multi_threaded(&target, nr_threads_synthesize); + if (err) + return err; + } + perf_set_singlethreaded(); + return 0; +} + +int bench_synthesize(int argc, const char **argv) +{ + int err = 0; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + /* + * If neither single threaded or multi-threaded are specified, default + * to running just single threaded. + */ + if (!run_st && !run_mt) + run_st = true; + + if (run_st) + err = run_single_threaded(); + + if (!err && run_mt) + err = run_multi_threaded(); + + return err; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6c0a0412502e..d3e5a84f87a2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -212,11 +212,9 @@ static bool has_annotation(struct perf_annotate *ann) return ui__has_annotation() || ann->use_stdio2; } -static int perf_evsel__add_sample(struct evsel *evsel, - struct perf_sample *sample, - struct addr_location *al, - struct perf_annotate *ann, - struct machine *machine) +static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample, + struct addr_location *al, struct perf_annotate *ann, + struct machine *machine) { struct hists *hists = evsel__hists(evsel); struct hist_entry *he; @@ -278,7 +276,7 @@ static int process_sample_event(struct perf_tool *tool, goto out_put; if (!al.filtered && - perf_evsel__add_sample(evsel, sample, &al, ann, machine)) { + evsel__add_sample(evsel, sample, &al, ann, machine)) { pr_warning("problem incrementing symbol count, " "skipping event\n"); ret = -1; @@ -433,11 +431,10 @@ static int __cmd_annotate(struct perf_annotate *ann) total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); /* Don't sort callchain */ - perf_evsel__reset_sample_bit(pos, CALLCHAIN); + evsel__reset_sample_bit(pos, CALLCHAIN); perf_evsel__output_resort(pos, NULL); - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) continue; hists__find_annotations(hists, pos, ann); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index c06fe21c8613..083273209c88 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -76,6 +76,12 @@ static struct bench epoll_benchmarks[] = { }; #endif // HAVE_EVENTFD +static struct bench internals_benchmarks[] = { + { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, + { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse }, + { NULL, NULL, NULL } +}; + struct collection { const char *name; const char *summary; @@ -92,6 +98,7 @@ static struct collection collections[] = { #ifdef HAVE_EVENTFD {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, #endif + { "internals", "Perf-internals benchmarks", internals_benchmarks }, { "all", "All benchmarks", NULL }, { NULL, NULL, NULL } }; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 246ac0b4d54f..1baf4cae086f 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -95,6 +95,7 @@ struct perf_c2c { bool use_stdio; bool stats_only; bool symbol_full; + bool stitch_lbr; /* HITM shared clines stats */ struct c2c_stats hitm_stats; @@ -273,6 +274,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } + if (c2c.stitch_lbr) + al.thread->lbr_stitch_enable = true; + ret = sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, sysctl_perf_event_max_stack); if (ret) @@ -1705,7 +1709,7 @@ static struct c2c_dimension *get_dimension(const char *name) if (!strcmp(dim->name, name)) return dim; - }; + } return NULL; } @@ -1921,7 +1925,7 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) FILTER_HITM(tot_hitm); default: break; - }; + } #undef FILTER_HITM @@ -2255,8 +2259,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session) fprintf(out, "=================================================\n"); evlist__for_each_entry(evlist, evsel) { - fprintf(out, "%-36s: %s\n", first ? " Events" : "", - perf_evsel__name(evsel)); + fprintf(out, "%-36s: %s\n", first ? " Events" : "", evsel__name(evsel)); first = false; } fprintf(out, " Cachelines sort on : %s HITMs\n", @@ -2601,6 +2604,12 @@ static int setup_callchain(struct evlist *evlist) } } + if (c2c.stitch_lbr && (mode != CALLCHAIN_LBR)) { + ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" + "Please apply --call-graph lbr when recording.\n"); + c2c.stitch_lbr = false; + } + callchain_param.record_mode = mode; callchain_param.min_percent = 0; return 0; @@ -2752,6 +2761,8 @@ static int perf_c2c__report(int argc, const char **argv) OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", "coalesce fields: pid,tid,iaddr,dso"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), + OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPT_PARENT(c2c_options), OPT_END() }; @@ -2947,7 +2958,7 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "-e"; rec_argv[i++] = perf_mem_events__name(j); - }; + } if (all_user) rec_argv[i++] = "--all-user"; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index c94a002f295e..f8c9bdd8269a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -467,7 +467,7 @@ static struct evsel *evsel_match(struct evsel *evsel, struct evsel *e; evlist__for_each_entry(evlist, e) { - if (perf_evsel__match2(evsel, e)) + if (evsel__match2(evsel, e)) return e; } @@ -981,7 +981,7 @@ static void data_process(void) if (!quiet) { fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n", - perf_evsel__name(evsel_base)); + evsel__name(evsel_base)); } first = false; @@ -990,7 +990,7 @@ static void data_process(void) data__fprintf(); /* Don't sort callchain for perf diff */ - perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + evsel__reset_sample_bit(evsel_base, CALLCHAIN); hists__process(hists_base); } @@ -1562,7 +1562,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, default: BUG_ON(1); - }; + } } static void diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index d5adc417a4ca..55eda54240fb 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -284,10 +284,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) .events = POLLIN, }; - if (!perf_cap__capable(CAP_SYS_ADMIN)) { + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { pr_err("ftrace only works for %s!\n", #ifdef HAVE_LIBCAP_SUPPORT - "users with the SYS_ADMIN capability" + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" #else "root" #endif diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 7e124a7b8bfd..53932db97a79 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -536,7 +536,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool, union perf_event *event_sw; struct perf_sample sample_sw; struct perf_inject *inject = container_of(tool, struct perf_inject, tool); - u32 pid = perf_evsel__intval(evsel, sample, "pid"); + u32 pid = evsel__intval(evsel, sample, "pid"); list_for_each_entry(ent, &inject->samples, node) { if (pid == ent->tid) @@ -546,7 +546,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool, return 0; found: event_sw = &ent->event[0]; - perf_evsel__parse_sample(evsel, event_sw, &sample_sw); + evsel__parse_sample(evsel, event_sw, &sample_sw); sample_sw.period = sample->period; sample_sw.time = sample->time; @@ -561,11 +561,10 @@ static void sig_handler(int sig __maybe_unused) session_done = 1; } -static int perf_evsel__check_stype(struct evsel *evsel, - u64 sample_type, const char *sample_msg) +static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg) { struct perf_event_attr *attr = &evsel->core.attr; - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!(attr->sample_type & sample_type)) { pr_err("Samples for %s event do not have %s attribute set.", @@ -622,10 +621,10 @@ static int __cmd_inject(struct perf_inject *inject) struct evsel *evsel; evlist__for_each_entry(session->evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) { - if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) + if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID")) return -EINVAL; evsel->handler = perf_inject__sched_switch; @@ -684,14 +683,14 @@ static int __cmd_inject(struct perf_inject *inject) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); - if (inject->itrace_synth_opts.last_branch) + if (inject->itrace_synth_opts.last_branch || + inject->itrace_synth_opts.add_last_branch) perf_header__set_feat(&session->header, HEADER_BRANCH_STACK); evsel = perf_evlist__id2evsel_strict(session->evlist, inject->aux_id); if (evsel) { - pr_debug("Deleting %s\n", - perf_evsel__name(evsel)); + pr_debug("Deleting %s\n", evsel__name(evsel)); evlist__remove(session->evlist, evsel); evsel__delete(evsel); } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 003c85f5f56c..38a5ab683ebc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -169,13 +169,12 @@ static int insert_caller_stat(unsigned long call_site, return 0; } -static int perf_evsel__process_alloc_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample) { - unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"), - call_site = perf_evsel__intval(evsel, sample, "call_site"); - int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"), - bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc"); + unsigned long ptr = evsel__intval(evsel, sample, "ptr"), + call_site = evsel__intval(evsel, sample, "call_site"); + int bytes_req = evsel__intval(evsel, sample, "bytes_req"), + bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc"); if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) || insert_caller_stat(call_site, bytes_req, bytes_alloc)) @@ -188,14 +187,13 @@ static int perf_evsel__process_alloc_event(struct evsel *evsel, return 0; } -static int perf_evsel__process_alloc_node_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) { - int ret = perf_evsel__process_alloc_event(evsel, sample); + int ret = evsel__process_alloc_event(evsel, sample); if (!ret) { int node1 = cpu__get_node(sample->cpu), - node2 = perf_evsel__intval(evsel, sample, "node"); + node2 = evsel__intval(evsel, sample, "node"); if (node1 != node2) nr_cross_allocs++; @@ -232,10 +230,9 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, return NULL; } -static int perf_evsel__process_free_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample) { - unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"); + unsigned long ptr = evsel__intval(evsel, sample, "ptr"); struct alloc_stat *s_alloc, *s_caller; s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); @@ -784,13 +781,12 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample, return 0; } -static int perf_evsel__process_page_alloc_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample) { u64 page; - unsigned int order = perf_evsel__intval(evsel, sample, "order"); - unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); - unsigned int migrate_type = perf_evsel__intval(evsel, sample, + unsigned int order = evsel__intval(evsel, sample, "order"); + unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags"); + unsigned int migrate_type = evsel__intval(evsel, sample, "migratetype"); u64 bytes = kmem_page_size << order; u64 callsite; @@ -802,9 +798,9 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel, }; if (use_pfn) - page = perf_evsel__intval(evsel, sample, "pfn"); + page = evsel__intval(evsel, sample, "pfn"); else - page = perf_evsel__intval(evsel, sample, "page"); + page = evsel__intval(evsel, sample, "page"); nr_page_allocs++; total_page_alloc_bytes += bytes; @@ -857,11 +853,10 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel, return 0; } -static int perf_evsel__process_page_free_event(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample) { u64 page; - unsigned int order = perf_evsel__intval(evsel, sample, "order"); + unsigned int order = evsel__intval(evsel, sample, "order"); u64 bytes = kmem_page_size << order; struct page_stat *pstat; struct page_stat this = { @@ -869,9 +864,9 @@ static int perf_evsel__process_page_free_event(struct evsel *evsel, }; if (use_pfn) - page = perf_evsel__intval(evsel, sample, "pfn"); + page = evsel__intval(evsel, sample, "pfn"); else - page = perf_evsel__intval(evsel, sample, "page"); + page = evsel__intval(evsel, sample, "page"); nr_page_frees++; total_page_free_bytes += bytes; @@ -1371,15 +1366,15 @@ static int __cmd_kmem(struct perf_session *session) struct evsel *evsel; const struct evsel_str_handler kmem_tracepoints[] = { /* slab allocator */ - { "kmem:kmalloc", perf_evsel__process_alloc_event, }, - { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, - { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, - { "kmem:kfree", perf_evsel__process_free_event, }, - { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, + { "kmem:kmalloc", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kfree", evsel__process_free_event, }, + { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ - { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, - { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, + { "kmem:mm_page_alloc", evsel__process_page_alloc_event, }, + { "kmem:mm_page_free", evsel__process_page_free_event, }, }; if (!perf_session__has_traces(session, "kmem record")) @@ -1391,8 +1386,8 @@ static int __cmd_kmem(struct perf_session *session) } evlist__for_each_entry(session->evlist, evsel) { - if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && - perf_evsel__field(evsel, "pfn")) { + if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") && + evsel__field(evsel, "pfn")) { use_pfn = true; break; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 577af4f3297a..95a77058023e 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -69,7 +69,7 @@ void exit_event_get_key(struct evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); + key->key = evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct evsel *evsel) @@ -416,8 +416,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return NULL; } - vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, - vcpu_id_str); + vcpu_record->vcpu_id = evsel__intval(evsel, sample, vcpu_id_str); thread__set_priv(thread, vcpu_record); } @@ -1033,16 +1032,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) struct perf_event_attr *attr = &pos->core.attr; /* make sure these *are* set */ - perf_evsel__set_sample_bit(pos, TID); - perf_evsel__set_sample_bit(pos, TIME); - perf_evsel__set_sample_bit(pos, CPU); - perf_evsel__set_sample_bit(pos, RAW); + evsel__set_sample_bit(pos, TID); + evsel__set_sample_bit(pos, TIME); + evsel__set_sample_bit(pos, CPU); + evsel__set_sample_bit(pos, RAW); /* make sure these are *not*; want as small a sample as possible */ - perf_evsel__reset_sample_bit(pos, PERIOD); - perf_evsel__reset_sample_bit(pos, IP); - perf_evsel__reset_sample_bit(pos, CALLCHAIN); - perf_evsel__reset_sample_bit(pos, ADDR); - perf_evsel__reset_sample_bit(pos, READ); + evsel__reset_sample_bit(pos, PERIOD); + evsel__reset_sample_bit(pos, IP); + evsel__reset_sample_bit(pos, CALLCHAIN); + evsel__reset_sample_bit(pos, ADDR); + evsel__reset_sample_bit(pos, READ); attr->mmap = 0; attr->comm = 0; attr->task = 0; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 474dfd59d7eb..f0a1dbacb46c 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -48,7 +48,7 @@ struct lock_stat { struct rb_node rb; /* used for sorting */ /* - * FIXME: perf_evsel__intval() returns u64, + * FIXME: evsel__intval() returns u64, * so address of lockdep_map should be dealed as 64bit. * Is there more better solution? */ @@ -404,9 +404,9 @@ static int report_lock_acquire_event(struct evsel *evsel, struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); - int flag = perf_evsel__intval(evsel, sample, "flag"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); + int flag = evsel__intval(evsel, sample, "flag"); memcpy(&addr, &tmp, sizeof(void *)); @@ -477,8 +477,8 @@ static int report_lock_acquired_event(struct evsel *evsel, struct thread_stat *ts; struct lock_seq_stat *seq; u64 contended_term; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); memcpy(&addr, &tmp, sizeof(void *)); @@ -539,8 +539,8 @@ static int report_lock_contended_event(struct evsel *evsel, struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); memcpy(&addr, &tmp, sizeof(void *)); @@ -594,8 +594,8 @@ static int report_lock_release_event(struct evsel *evsel, struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; - const char *name = perf_evsel__strval(evsel, sample, "name"); - u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr"); + const char *name = evsel__strval(evsel, sample, "name"); + u64 tmp = evsel__intval(evsel, sample, "lockdep_addr"); memcpy(&addr, &tmp, sizeof(void *)); @@ -657,32 +657,28 @@ static struct trace_lock_handler report_lock_ops = { static struct trace_lock_handler *trace_handler; -static int perf_evsel__process_lock_acquire(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->acquire_event) return trace_handler->acquire_event(evsel, sample); return 0; } -static int perf_evsel__process_lock_acquired(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->acquired_event) return trace_handler->acquired_event(evsel, sample); return 0; } -static int perf_evsel__process_lock_contended(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->contended_event) return trace_handler->contended_event(evsel, sample); return 0; } -static int perf_evsel__process_lock_release(struct evsel *evsel, - struct perf_sample *sample) +static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample) { if (trace_handler->release_event) return trace_handler->release_event(evsel, sample); @@ -775,7 +771,7 @@ static void dump_threads(void) pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); node = rb_next(node); thread__put(t); - }; + } } static void dump_map(void) @@ -849,10 +845,10 @@ static void sort_result(void) } static const struct evsel_str_handler lock_tracepoints[] = { - { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ - { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; static bool force; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index a13f5817d6fc..68a7eb84561a 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -123,7 +123,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "-e"; rec_argv[i++] = perf_mem_events__name(j); - }; + } if (all_user) rec_argv[i++] = "--all-user"; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1ab349abe904..e4efdbf1a81e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -34,6 +34,7 @@ #include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" +#include "util/perf_api_probe.h" #include "util/llvm-utils.h" #include "util/bpf-loader.h" #include "util/trigger.h" @@ -43,6 +44,7 @@ #include "util/time-utils.h" #include "util/units.h" #include "util/bpf-event.h" +#include "util/util.h" #include "asm/bug.h" #include "perf.h" @@ -50,6 +52,7 @@ #include <inttypes.h> #include <locale.h> #include <poll.h> +#include <pthread.h> #include <unistd.h> #include <sched.h> #include <signal.h> @@ -84,7 +87,10 @@ struct record { struct auxtrace_record *itr; struct evlist *evlist; struct perf_session *session; + struct evlist *sb_evlist; + pthread_t thread_id; int realtime_prio; + bool switch_output_event_set; bool no_buildid; bool no_buildid_set; bool no_buildid_cache; @@ -503,6 +509,20 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, NULL, event, event->header.size); } +static int process_locked_synthesized_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER; + int ret; + + pthread_mutex_lock(&synth_lock); + ret = process_synthesized_event(tool, event, sample, machine); + pthread_mutex_unlock(&synth_lock); + return ret; +} + static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) { struct record *rec = to; @@ -825,7 +845,7 @@ static int record__open(struct record *rec) evlist__for_each_entry(evlist, pos) { try_again: if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) { - if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { + if (evsel__fallback(pos, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; @@ -837,8 +857,7 @@ try_again: goto try_again; } rc = -errno; - perf_evsel__open_strerror(pos, &opts->target, - errno, msg, sizeof(msg)); + evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); goto out; } @@ -859,7 +878,7 @@ try_again: if (perf_evlist__apply_filters(evlist, &pos)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", - pos->filter, perf_evsel__name(pos), errno, + pos->filter, evsel__name(pos), errno, str_error_r(errno, msg, sizeof(msg))); rc = -1; goto out; @@ -1288,6 +1307,7 @@ static int record__synthesize(struct record *rec, bool tail) struct perf_tool *tool = &rec->tool; int fd = perf_data__fd(data); int err = 0; + event_op f = process_synthesized_event; if (rec->opts.tail_synthesize != tail) return 0; @@ -1402,13 +1422,67 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize cgroup events.\n"); + if (rec->opts.nr_threads_synthesize > 1) { + perf_set_multithreaded(); + f = process_locked_synthesized_event; + } + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, - process_synthesized_event, opts->sample_address, - 1); + f, opts->sample_address, + rec->opts.nr_threads_synthesize); + + if (rec->opts.nr_threads_synthesize > 1) + perf_set_singlethreaded(); + out: return err; } +static int record__process_signal_event(union perf_event *event __maybe_unused, void *data) +{ + struct record *rec = data; + pthread_kill(rec->thread_id, SIGUSR2); + return 0; +} + +static int record__setup_sb_evlist(struct record *rec) +{ + struct record_opts *opts = &rec->opts; + + if (rec->sb_evlist != NULL) { + /* + * We get here if --switch-output-event populated the + * sb_evlist, so associate a callback that will send a SIGUSR2 + * to the main thread. + */ + evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec); + rec->thread_id = pthread_self(); + } + + if (!opts->no_bpf_event) { + if (rec->sb_evlist == NULL) { + rec->sb_evlist = evlist__new(); + + if (rec->sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + return -1; + } + } + + if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + return -1; + } + } + + if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + + return 0; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1420,7 +1494,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; - struct evlist *sb_evlist = NULL; int fd; float ratio = 0; @@ -1546,21 +1619,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = -1; if (!rec->no_buildid && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) { pr_err("Couldn't generate buildids. " "Use --no-buildid to profile anyway.\n"); - err = -1; goto out_child; } - if (!opts->no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &session->header.env); - - if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { - pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); - opts->no_bpf_event = true; - } + err = record__setup_sb_evlist(rec); + if (err) + goto out_child; err = record__synthesize(rec, false); if (err < 0) @@ -1831,7 +1900,7 @@ out_delete_session: perf_session__delete(session); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(rec->sb_evlist); return status; } @@ -2142,10 +2211,19 @@ static int switch_output_setup(struct record *rec) }; unsigned long val; + /* + * If we're using --switch-output-events, then we imply its + * --switch-output=signal, as we'll send a SIGUSR2 from the side band + * thread to its parent. + */ + if (rec->switch_output_event_set) + goto do_signal; + if (!s->set) return 0; if (!strcmp(s->str, "signal")) { +do_signal: s->signal = true; pr_debug("switch-output with SIGUSR2 signal\n"); goto enabled; @@ -2232,6 +2310,7 @@ static struct record record = { .default_per_cpu = true, }, .mmap_flush = MMAP_FLUSH_DEFAULT, + .nr_threads_synthesize = 1, }, .tool = { .sample = process_sample_event, @@ -2402,6 +2481,9 @@ static struct option __record_options[] = { &record.switch_output.set, "signal or size[BKMG] or time[smhd]", "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), + OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event", + "switch output event selector. use 'perf list' to list available events", + parse_events_option_new_evlist), OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, "Limit number of switch output generated files"), OPT_BOOLEAN(0, "dry-run", &dry_run, @@ -2421,6 +2503,9 @@ static struct option __record_options[] = { #endif OPT_CALLBACK(0, "max-size", &record.output_max_size, "size", "Limit the maximum size of the output file", parse_output_max_size), + OPT_UINTEGER(0, "num-thread-synthesize", + &record.opts.nr_threads_synthesize, + "number of threads to run for event synthesis"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 26d8fc27e427..ba63390246c2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -84,6 +84,7 @@ struct report { bool header_only; bool nonany_branch_mode; bool group_set; + bool stitch_lbr; int max_stack; struct perf_read_values show_threads_values; struct annotation_options annotation_opts; @@ -267,6 +268,9 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (rep->stitch_lbr) + al.thread->lbr_stitch_enable = true; + if (symbol_conf.hide_unresolved && al.sym == NULL) goto out_put; @@ -317,7 +321,7 @@ static int process_read_event(struct perf_tool *tool, struct report *rep = container_of(tool, struct report, tool); if (rep->show_threads) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, evsel->idx, @@ -339,12 +343,14 @@ static int report__setup_sample_type(struct report *rep) bool is_pipe = perf_data__is_pipe(session->data); if (session->itrace_synth_opts->callchain || + session->itrace_synth_opts->add_callchain || (!is_pipe && perf_header__has_feat(&session->header, HEADER_AUXTRACE) && !session->itrace_synth_opts->set)) sample_type |= PERF_SAMPLE_CALLCHAIN; - if (session->itrace_synth_opts->last_branch) + if (session->itrace_synth_opts->last_branch || + session->itrace_synth_opts->add_last_branch) sample_type |= PERF_SAMPLE_BRANCH_STACK; if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) { @@ -407,6 +413,12 @@ static int report__setup_sample_type(struct report *rep) callchain_param.record_mode = CALLCHAIN_FP; } + if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { + ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" + "Please apply --call-graph lbr when recording.\n"); + rep->stitch_lbr = false; + } + /* ??? handle more cases than just ANY? */ if (!(perf_evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) @@ -447,10 +459,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report nr_events = hists->stats.total_non_filtered_period; } - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - perf_evsel__group_desc(evsel, buf, size); + evsel__group_desc(evsel, buf, size); evname = buf; for_each_group_member(pos, evsel) { @@ -525,10 +537,9 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - const char *evname = perf_evsel__name(pos); + const char *evname = evsel__name(pos); - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) continue; hists__fprintf_nr_sample_events(hists, rep, evname, stdout); @@ -670,8 +681,7 @@ static int report__collapse_hists(struct report *rep) break; /* Non-group events are considered as leader */ - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) { + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) { struct hists *leader_hists = evsel__hists(pos->leader); hists__match(leader_hists, hists); @@ -1257,6 +1267,8 @@ int cmd_report(int argc, const char **argv) "Show full source file name path for source lines"), OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, "Show callgraph from reference event"), + OPT_BOOLEAN(0, "stitch-lbr", &report.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPT_INTEGER(0, "socket-filter", &report.socket_filter, "only show processor socket that match with this filter"), OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, @@ -1332,7 +1344,7 @@ int cmd_report(int argc, const char **argv) if (symbol_conf.cumulate_callchain && !callchain_param.order_set) callchain_param.order = ORDER_CALLER; - if (itrace_synth_opts.callchain && + if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) && (int)itrace_synth_opts.callchain_sz > report.max_stack) report.max_stack = itrace_synth_opts.callchain_sz; @@ -1380,7 +1392,7 @@ repeat: goto error; } - if (itrace_synth_opts.last_branch) + if (itrace_synth_opts.last_branch || itrace_synth_opts.add_last_branch) has_br_stack = true; if (has_br_stack && branch_call_mode) @@ -1400,7 +1412,7 @@ repeat: } if (branch_call_mode) { callchain_param.key = CCKEY_ADDRESS; - callchain_param.branch_callstack = 1; + callchain_param.branch_callstack = true; symbol_conf.use_callchain = true; callchain_register_param(&callchain_param); if (sort_order == NULL) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 82fcc2c15fe4..459e4229945e 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -811,8 +811,8 @@ replay_wakeup_event(struct perf_sched *sched, struct evsel *evsel, struct perf_sample *sample, struct machine *machine __maybe_unused) { - const char *comm = perf_evsel__strval(evsel, sample, "comm"); - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const char *comm = evsel__strval(evsel, sample, "comm"); + const u32 pid = evsel__intval(evsel, sample, "pid"); struct task_desc *waker, *wakee; if (verbose > 0) { @@ -833,11 +833,11 @@ static int replay_switch_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine __maybe_unused) { - const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"), - *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); - const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), - next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"), + *next_comm = evsel__strval(evsel, sample, "next_comm"); + const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), + next_pid = evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = evsel__intval(evsel, sample, "prev_state"); struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; @@ -1106,9 +1106,9 @@ static int latency_switch_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), - next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), + next_pid = evsel__intval(evsel, sample, "next_pid"); + const u64 prev_state = evsel__intval(evsel, sample, "prev_state"); struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; u64 timestamp0, timestamp = sample->time; @@ -1176,8 +1176,8 @@ static int latency_runtime_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); - const u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); + const u32 pid = evsel__intval(evsel, sample, "pid"); + const u64 runtime = evsel__intval(evsel, sample, "runtime"); struct thread *thread = machine__findnew_thread(machine, -1, pid); struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); u64 timestamp = sample->time; @@ -1211,7 +1211,7 @@ static int latency_wakeup_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); struct work_atoms *atoms; struct work_atom *atom; struct thread *wakee; @@ -1272,7 +1272,7 @@ static int latency_migrate_task_event(struct perf_sched *sched, struct perf_sample *sample, struct machine *machine) { - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); u64 timestamp = sample->time; struct work_atoms *atoms; struct work_atom *atom; @@ -1526,7 +1526,7 @@ map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, struct perf_sample *sample, struct machine *machine) { - const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); struct thread *sched_in; struct thread_runtime *tr; int new_shortname; @@ -1670,8 +1670,8 @@ static int process_sched_switch_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int this_cpu = sample->cpu, err = 0; - u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"), - next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), + next_pid = evsel__intval(evsel, sample, "next_pid"); if (sched->curr_pid[this_cpu] != (u32)-1) { /* @@ -1848,7 +1848,7 @@ static inline void print_sched_time(unsigned long long nsecs, int width) * returns runtime data for event, allocating memory for it the * first time it is used. */ -static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel) +static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel) { struct evsel_runtime *r = evsel->priv; @@ -1863,10 +1863,9 @@ static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel) /* * save last time event was seen per cpu */ -static void perf_evsel__save_time(struct evsel *evsel, - u64 timestamp, u32 cpu) +static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu) { - struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + struct evsel_runtime *r = evsel__get_runtime(evsel); if (r == NULL) return; @@ -1890,9 +1889,9 @@ static void perf_evsel__save_time(struct evsel *evsel, } /* returns last time this event was seen on the given cpu */ -static u64 perf_evsel__get_time(struct evsel *evsel, u32 cpu) +static u64 evsel__get_time(struct evsel *evsel, u32 cpu) { - struct evsel_runtime *r = perf_evsel__get_runtime(evsel); + struct evsel_runtime *r = evsel__get_runtime(evsel); if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu)) return 0; @@ -2004,8 +2003,8 @@ static void timehist_print_sample(struct perf_sched *sched, u64 t, int state) { struct thread_runtime *tr = thread__priv(thread); - const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm"); - const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid"); + const char *next_comm = evsel__strval(evsel, sample, "next_comm"); + const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); u32 max_cpus = sched->max_cpu + 1; char tstr[64]; char nstr[30]; @@ -2136,8 +2135,8 @@ static bool is_idle_sample(struct perf_sample *sample, struct evsel *evsel) { /* pid 0 == swapper == idle task */ - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0) - return perf_evsel__intval(evsel, sample, "prev_pid") == 0; + if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0) + return evsel__intval(evsel, sample, "prev_pid") == 0; return sample->pid == 0; } @@ -2334,7 +2333,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, itr->last_thread = thread; /* copy task callchain when entering to idle */ - if (perf_evsel__intval(evsel, sample, "next_pid") == 0) + if (evsel__intval(evsel, sample, "next_pid") == 0) save_idle_callchain(sched, itr, sample); } } @@ -2355,10 +2354,10 @@ static bool timehist_skip_sample(struct perf_sched *sched, } if (sched->idle_hist) { - if (strcmp(perf_evsel__name(evsel), "sched:sched_switch")) + if (strcmp(evsel__name(evsel), "sched:sched_switch")) rc = true; - else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 && - perf_evsel__intval(evsel, sample, "next_pid") != 0) + else if (evsel__intval(evsel, sample, "prev_pid") != 0 && + evsel__intval(evsel, sample, "next_pid") != 0) rc = true; } @@ -2409,7 +2408,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool, struct thread *thread; struct thread_runtime *tr = NULL; /* want pid of awakened task not pid in sample */ - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); thread = machine__findnew_thread(machine, 0, pid); if (thread == NULL) @@ -2445,8 +2444,8 @@ static void timehist_print_migration_event(struct perf_sched *sched, return; max_cpus = sched->max_cpu + 1; - ocpu = perf_evsel__intval(evsel, sample, "orig_cpu"); - dcpu = perf_evsel__intval(evsel, sample, "dest_cpu"); + ocpu = evsel__intval(evsel, sample, "orig_cpu"); + dcpu = evsel__intval(evsel, sample, "dest_cpu"); thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) @@ -2493,7 +2492,7 @@ static int timehist_migrate_task_event(struct perf_tool *tool, struct thread *thread; struct thread_runtime *tr = NULL; /* want pid of migrated task not pid in sample */ - const u32 pid = perf_evsel__intval(evsel, sample, "pid"); + const u32 pid = evsel__intval(evsel, sample, "pid"); thread = machine__findnew_thread(machine, 0, pid); if (thread == NULL) @@ -2524,8 +2523,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, struct thread_runtime *tr = NULL; u64 tprev, t = sample->time; int rc = 0; - int state = perf_evsel__intval(evsel, sample, "prev_state"); - + int state = evsel__intval(evsel, sample, "prev_state"); if (machine__resolve(machine, &al, sample) < 0) { pr_err("problem processing %d event. skipping it\n", @@ -2549,7 +2547,7 @@ static int timehist_sched_change_event(struct perf_tool *tool, goto out; } - tprev = perf_evsel__get_time(evsel, sample->cpu); + tprev = evsel__get_time(evsel, sample->cpu); /* * If start time given: @@ -2632,7 +2630,7 @@ out: tr->ready_to_run = 0; } - perf_evsel__save_time(evsel, sample->time, sample->cpu); + evsel__save_time(evsel, sample->time, sample->cpu); return rc; } @@ -2942,7 +2940,7 @@ static int timehist_check_attr(struct perf_sched *sched, struct evsel_runtime *er; list_for_each_entry(evsel, &evlist->core.entries, core.node) { - er = perf_evsel__get_runtime(evsel); + er = evsel__get_runtime(evsel); if (er == NULL) { pr_err("Failed to allocate memory for evsel runtime data\n"); return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1f57a7ecdf3d..56d7bcd12671 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -273,7 +273,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, struct evsel_script *es = zalloc(sizeof(*es)); if (es != NULL) { - if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) + if (asprintf(&es->filename, "%s.%s.dump", data->file.path, evsel__name(evsel)) < 0) goto out_free; es->fp = fopen(es->filename, "w"); if (es->fp == NULL) @@ -351,10 +351,8 @@ static const char *output_field2str(enum perf_output_field field) #define PRINT_FIELD(x) (output[output_type(attr->type)].fields & PERF_OUTPUT_##x) -static int perf_evsel__do_check_stype(struct evsel *evsel, - u64 sample_type, const char *sample_msg, - enum perf_output_field field, - bool allow_user_set) +static int evsel__do_check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg, + enum perf_output_field field, bool allow_user_set) { struct perf_event_attr *attr = &evsel->core.attr; int type = output_type(attr->type); @@ -366,7 +364,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel, if (output[type].user_set_fields & field) { if (allow_user_set) return 0; - evname = perf_evsel__name(evsel); + evname = evsel__name(evsel); pr_err("Samples for '%s' event do not have %s attribute set. " "Cannot print '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -375,7 +373,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel, /* user did not ask for it explicitly so remove from the default list */ output[type].fields &= ~field; - evname = perf_evsel__name(evsel); + evname = evsel__name(evsel); pr_debug("Samples for '%s' event do not have %s attribute set. " "Skipping '%s' field.\n", evname, sample_msg, output_field2str(field)); @@ -383,16 +381,13 @@ static int perf_evsel__do_check_stype(struct evsel *evsel, return 0; } -static int perf_evsel__check_stype(struct evsel *evsel, - u64 sample_type, const char *sample_msg, - enum perf_output_field field) +static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg, + enum perf_output_field field) { - return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field, - false); + return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false); } -static int perf_evsel__check_attr(struct evsel *evsel, - struct perf_session *session) +static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session) { struct perf_event_attr *attr = &evsel->core.attr; bool allow_user_set; @@ -404,32 +399,28 @@ static int perf_evsel__check_attr(struct evsel *evsel, HEADER_AUXTRACE); if (PRINT_FIELD(TRACE) && - !perf_session__has_traces(session, "record -R")) + !perf_session__has_traces(session, "record -R")) return -EINVAL; if (PRINT_FIELD(IP)) { - if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", - PERF_OUTPUT_IP)) + if (evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", PERF_OUTPUT_IP)) return -EINVAL; } if (PRINT_FIELD(ADDR) && - perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", - PERF_OUTPUT_ADDR, allow_user_set)) + evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", PERF_OUTPUT_ADDR, allow_user_set)) return -EINVAL; if (PRINT_FIELD(DATA_SRC) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", - PERF_OUTPUT_DATA_SRC)) + evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC)) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", - PERF_OUTPUT_WEIGHT)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && - !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { + !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { pr_err("Display of symbols requested but neither sample IP nor " "sample address\navailable. Hence, no addresses to convert " "to symbols.\n"); @@ -441,7 +432,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, return -EINVAL; } if (PRINT_FIELD(DSO) && - !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { + !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { pr_err("Display of DSO requested but no address to convert.\n"); return -EINVAL; } @@ -458,33 +449,27 @@ static int perf_evsel__check_attr(struct evsel *evsel, return -EINVAL; } if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", - PERF_OUTPUT_TID|PERF_OUTPUT_PID)) + evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID)) return -EINVAL; if (PRINT_FIELD(TIME) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", - PERF_OUTPUT_TIME)) + evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", PERF_OUTPUT_TIME)) return -EINVAL; if (PRINT_FIELD(CPU) && - perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", - PERF_OUTPUT_CPU, allow_user_set)) + evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; if (PRINT_FIELD(IREGS) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", - PERF_OUTPUT_IREGS)) + evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) return -EINVAL; if (PRINT_FIELD(UREGS) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", - PERF_OUTPUT_UREGS)) + evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", PERF_OUTPUT_UREGS)) return -EINVAL; if (PRINT_FIELD(PHYS_ADDR) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", - PERF_OUTPUT_PHYS_ADDR)) + evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) return -EINVAL; return 0; @@ -604,8 +589,6 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); } - fprintf(fp, "\n"); - return printed; } @@ -1697,6 +1680,7 @@ struct perf_script { bool show_cgroup_events; bool allocated; bool per_event_dump; + bool stitch_lbr; struct evswitch evswitch; struct perf_cpu_map *cpus; struct perf_thread_map *threads; @@ -1713,7 +1697,7 @@ static int perf_evlist__max_name_len(struct evlist *evlist) int max = 0; evlist__for_each_entry(evlist, evsel) { - int len = strlen(perf_evsel__name(evsel)); + int len = strlen(evsel__name(evsel)); max = MAX(len, max); } @@ -1887,7 +1871,7 @@ static void process_event(struct perf_script *script, fprintf(fp, "%10" PRIu64 " ", sample->period); if (PRINT_FIELD(EVNAME)) { - const char *evname = perf_evsel__name(evsel); + const char *evname = evsel__name(evsel); if (!script->name_width) script->name_width = perf_evlist__max_name_len(script->session->evlist); @@ -1923,6 +1907,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; + if (script->stitch_lbr) + al->thread->lbr_stitch_enable = true; + if (symbol_conf.use_callchain && sample->callchain && thread__resolve_callchain(al->thread, &callchain_cursor, evsel, sample, NULL, NULL, scripting_max_stack) == 0) @@ -1946,7 +1933,7 @@ static void process_event(struct perf_script *script, else if (PRINT_FIELD(BRSTACKOFF)) perf_sample__fprintf_brstackoff(sample, thread, attr, fp); - if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) + if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) perf_sample__fprintf_bpf_output(sample, fp); perf_sample__fprintf_insn(sample, attr, thread, machine, fp); @@ -1975,7 +1962,7 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_evsel__nr_cpus(counter); + int ncpus = evsel__nr_cpus(counter); int cpu, thread; static int header_printed; @@ -2001,7 +1988,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp) counts->ena, counts->run, tstamp, - perf_evsel__name(counter)); + evsel__name(counter)); } } } @@ -2040,7 +2027,7 @@ static int cleanup_scripting(void) static bool filter_cpu(struct perf_sample *sample) { - if (cpu_list) + if (cpu_list && sample->cpu != (u32)-1) return !test_bit(sample->cpu, cpu_bitmap); return false; } @@ -2138,41 +2125,59 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, return err; } -static int process_comm_event(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct machine *machine) +static int print_event_with_time(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine, + pid_t pid, pid_t tid, u64 timestamp) { - struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - int ret = -1; + struct thread *thread = NULL; - thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); - if (thread == NULL) { - pr_debug("problem processing COMM event, skipping it.\n"); - return -1; + if (evsel && !evsel->core.attr.sample_id_all) { + sample->cpu = 0; + sample->time = timestamp; + sample->pid = pid; + sample->tid = tid; } - if (perf_event__process_comm(tool, event, sample, machine) < 0) - goto out; + if (filter_cpu(sample)) + return 0; - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->comm.tid; - sample->pid = event->comm.pid; - } - if (!filter_cpu(sample)) { + if (tid != -1) + thread = machine__findnew_thread(machine, pid, tid); + + if (thread && evsel) { perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_COMM, stdout); - perf_event__fprintf(event, stdout); + event->header.type, stdout); } - ret = 0; -out: + + perf_event__fprintf(event, stdout); + thread__put(thread); - return ret; + + return 0; +} + +static int print_event(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + pid_t pid, pid_t tid) +{ + return print_event_with_time(tool, event, sample, machine, pid, tid, 0); +} + +static int process_comm_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + if (perf_event__process_comm(tool, event, sample, machine) < 0) + return -1; + + return print_event(tool, event, sample, machine, event->comm.pid, + event->comm.tid); } static int process_namespaces_event(struct perf_tool *tool, @@ -2180,37 +2185,11 @@ static int process_namespaces_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - int ret = -1; - - thread = machine__findnew_thread(machine, event->namespaces.pid, - event->namespaces.tid); - if (thread == NULL) { - pr_debug("problem processing NAMESPACES event, skipping it.\n"); - return -1; - } - if (perf_event__process_namespaces(tool, event, sample, machine) < 0) - goto out; + return -1; - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->namespaces.tid; - sample->pid = event->namespaces.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_NAMESPACES, stdout); - perf_event__fprintf(event, stdout); - } - ret = 0; -out: - thread__put(thread); - return ret; + return print_event(tool, event, sample, machine, event->namespaces.pid, + event->namespaces.tid); } static int process_cgroup_event(struct perf_tool *tool, @@ -2218,34 +2197,11 @@ static int process_cgroup_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - int ret = -1; - - thread = machine__findnew_thread(machine, sample->pid, sample->tid); - if (thread == NULL) { - pr_debug("problem processing CGROUP event, skipping it.\n"); - return -1; - } - if (perf_event__process_cgroup(tool, event, sample, machine) < 0) - goto out; + return -1; - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_CGROUP, stdout); - perf_event__fprintf(event, stdout); - } - ret = 0; -out: - thread__put(thread); - return ret; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static int process_fork_event(struct perf_tool *tool, @@ -2253,69 +2209,24 @@ static int process_fork_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (perf_event__process_fork(tool, event, sample, machine) < 0) return -1; - thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); - if (thread == NULL) { - pr_debug("problem processing FORK event, skipping it.\n"); - return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = event->fork.time; - sample->tid = event->fork.tid; - sample->pid = event->fork.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_FORK, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - - return 0; + return print_event_with_time(tool, event, sample, machine, + event->fork.pid, event->fork.tid, + event->fork.time); } static int process_exit_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine) { - int err = 0; - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - - thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); - if (thread == NULL) { - pr_debug("problem processing EXIT event, skipping it.\n"); + /* Print before 'exit' deletes anything */ + if (print_event_with_time(tool, event, sample, machine, event->fork.pid, + event->fork.tid, event->fork.time)) return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->fork.tid; - sample->pid = event->fork.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_EXIT, stdout); - perf_event__fprintf(event, stdout); - } - - if (perf_event__process_exit(tool, event, sample, machine) < 0) - err = -1; - thread__put(thread); - return err; + return perf_event__process_exit(tool, event, sample, machine); } static int process_mmap_event(struct perf_tool *tool, @@ -2323,33 +2234,11 @@ static int process_mmap_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (perf_event__process_mmap(tool, event, sample, machine) < 0) return -1; - thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid); - if (thread == NULL) { - pr_debug("problem processing MMAP event, skipping it.\n"); - return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->mmap.tid; - sample->pid = event->mmap.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, event->mmap.pid, + event->mmap.tid); } static int process_mmap2_event(struct perf_tool *tool, @@ -2357,33 +2246,11 @@ static int process_mmap2_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (perf_event__process_mmap2(tool, event, sample, machine) < 0) return -1; - thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid); - if (thread == NULL) { - pr_debug("problem processing MMAP2 event, skipping it.\n"); - return -1; - } - - if (!evsel->core.attr.sample_id_all) { - sample->cpu = 0; - sample->time = 0; - sample->tid = event->mmap2.tid; - sample->pid = event->mmap2.pid; - } - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP2, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, event->mmap2.pid, + event->mmap2.tid); } static int process_switch_event(struct perf_tool *tool, @@ -2391,10 +2258,7 @@ static int process_switch_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; @@ -2405,20 +2269,8 @@ static int process_switch_event(struct perf_tool *tool, if (!script->show_switch_events) return 0; - thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - if (thread == NULL) { - pr_debug("problem processing SWITCH event, skipping it.\n"); - return -1; - } - - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_SWITCH, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static int @@ -2427,23 +2279,8 @@ process_lost_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - struct thread *thread; - - thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - if (thread == NULL) - return -1; - - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_LOST, stdout); - perf_event__fprintf(event, stdout); - } - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static int @@ -2462,33 +2299,11 @@ process_bpf_events(struct perf_tool *tool __maybe_unused, struct perf_sample *sample, struct machine *machine) { - struct thread *thread; - struct perf_script *script = container_of(tool, struct perf_script, tool); - struct perf_session *session = script->session; - struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); - if (machine__process_ksymbol(machine, event, sample) < 0) return -1; - if (!evsel->core.attr.sample_id_all) { - perf_event__fprintf(event, stdout); - return 0; - } - - thread = machine__findnew_thread(machine, sample->pid, sample->tid); - if (thread == NULL) { - pr_debug("problem processing MMAP event, skipping it.\n"); - return -1; - } - - if (!filter_cpu(sample)) { - perf_sample__fprintf_start(sample, thread, evsel, - event->header.type, stdout); - perf_event__fprintf(event, stdout); - } - - thread__put(thread); - return 0; + return print_event(tool, event, sample, machine, sample->pid, + sample->tid); } static void sig_handler(int sig __maybe_unused) @@ -3145,7 +2960,7 @@ static int check_ev_match(char *dir_name, char *scriptname, match = 0; evlist__for_each_entry(session->evlist, pos) { - if (!strcmp(perf_evsel__name(pos), evname)) { + if (!strcmp(evsel__name(pos), evname)) { match = 1; break; } @@ -3342,6 +3157,12 @@ static void script__setup_sample_type(struct perf_script *script) else callchain_param.record_mode = CALLCHAIN_FP; } + + if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { + pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" + "Please apply --call-graph lbr when recording.\n"); + script->stitch_lbr = false; + } } static int process_stat_round_event(struct perf_session *session, @@ -3653,6 +3474,8 @@ int cmd_script(int argc, const char **argv) "file", "file saving guest os /proc/kallsyms"), OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, "file", "file saving guest os /proc/modules"), + OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -3709,7 +3532,7 @@ int cmd_script(int argc, const char **argv) return -1; } - if (itrace_synth_opts.callchain && + if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ec053dc1e35c..e0c1ad23c768 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -238,9 +238,8 @@ static int write_stat_round_event(u64 tm, u64 type) #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) -static int -perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, - struct perf_counts_values *count) +static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, + struct perf_counts_values *count) { struct perf_sample_id *sid = SID(counter, cpu, thread); @@ -259,7 +258,7 @@ static int read_single_counter(struct evsel *counter, int cpu, count->val = val; return 0; } - return perf_evsel__read_counter(counter, cpu, thread); + return evsel__read_counter(counter, cpu, thread); } /* @@ -284,7 +283,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) /* * The leader's group read loads data into its group members - * (via perf_evsel__read_counter()) and sets their count->loaded. + * (via evsel__read_counter()) and sets their count->loaded. */ if (!perf_counts__is_loaded(counter->counts, cpu, thread) && read_single_counter(counter, cpu, thread, rs)) { @@ -297,7 +296,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) perf_counts__set_loaded(counter->counts, cpu, thread, false); if (STAT_RECORD) { - if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { + if (evsel__write_stat_event(counter, cpu, thread, count)) { pr_err("failed to write stat event\n"); return -1; } @@ -306,7 +305,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) if (verbose > 1) { fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), + evsel__name(counter), cpu, count->val, count->ena, count->run); } @@ -359,6 +358,7 @@ static void process_interval(void) clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); + perf_stat__reset_shadow_per_stat(&rt_stat); read_counters(&rs); if (STAT_RECORD) { @@ -409,7 +409,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static bool perf_evsel__should_store_id(struct evsel *counter) +static bool evsel__should_store_id(struct evsel *counter) { return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID; } @@ -454,7 +454,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) errno == ENXIO) { if (verbose > 0) ui__warning("%s event is not supported by the kernel.\n", - perf_evsel__name(counter)); + evsel__name(counter)); counter->supported = false; /* * errored is a sticky flag that means one of the counter's @@ -465,7 +465,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if ((counter->leader != counter) || !(counter->leader->core.nr_members > 1)) return COUNTER_SKIP; - } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; @@ -483,8 +483,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) } } - perf_evsel__open_strerror(counter, &target, - errno, msg, sizeof(msg)); + evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); if (child_pid != -1) @@ -604,7 +603,7 @@ try_again: if (!counter->reset_group) continue; try_again_reset: - pr_debug2("reopening weak %s\n", perf_evsel__name(counter)); + pr_debug2("reopening weak %s\n", evsel__name(counter)); if (create_perf_stat_counter(counter, &stat_config, &target, counter->cpu_iter - 1) < 0) { @@ -635,14 +634,14 @@ try_again_reset: if (l > stat_config.unit_width) stat_config.unit_width = l; - if (perf_evsel__should_store_id(counter) && - perf_evsel__store_ids(counter, evsel_list)) + if (evsel__should_store_id(counter) && + evsel__store_ids(counter, evsel_list)) return -1; } if (perf_evlist__apply_filters(evsel_list, &counter)) { pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n", - counter->filter, perf_evsel__name(counter), errno, + counter->filter, evsel__name(counter), errno, str_error_r(errno, msg, sizeof(msg))); return -1; } @@ -686,8 +685,11 @@ try_again_reset: break; } } - if (child_pid != -1) + if (child_pid != -1) { + if (timeout) + kill(child_pid, SIGTERM); wait4(child_pid, &status, 0, &stat_config.ru_data); + } if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 9e84fae9b096..c76f84b174c4 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -579,8 +579,8 @@ process_sample_cpu_idle(struct timechart *tchart __maybe_unused, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u32 state = perf_evsel__intval(evsel, sample, "state"); - u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); + u32 state = evsel__intval(evsel, sample, "state"); + u32 cpu_id = evsel__intval(evsel, sample, "cpu_id"); if (state == (u32)PWR_EVENT_EXIT) c_state_end(tchart, cpu_id, sample->time); @@ -595,8 +595,8 @@ process_sample_cpu_frequency(struct timechart *tchart, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u32 state = perf_evsel__intval(evsel, sample, "state"); - u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); + u32 state = evsel__intval(evsel, sample, "state"); + u32 cpu_id = evsel__intval(evsel, sample, "cpu_id"); p_state_change(tchart, cpu_id, sample->time, state); return 0; @@ -608,9 +608,9 @@ process_sample_sched_wakeup(struct timechart *tchart, struct perf_sample *sample, const char *backtrace) { - u8 flags = perf_evsel__intval(evsel, sample, "common_flags"); - int waker = perf_evsel__intval(evsel, sample, "common_pid"); - int wakee = perf_evsel__intval(evsel, sample, "pid"); + u8 flags = evsel__intval(evsel, sample, "common_flags"); + int waker = evsel__intval(evsel, sample, "common_pid"); + int wakee = evsel__intval(evsel, sample, "pid"); sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace); return 0; @@ -622,9 +622,9 @@ process_sample_sched_switch(struct timechart *tchart, struct perf_sample *sample, const char *backtrace) { - int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"); - int next_pid = perf_evsel__intval(evsel, sample, "next_pid"); - u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state"); + int prev_pid = evsel__intval(evsel, sample, "prev_pid"); + int next_pid = evsel__intval(evsel, sample, "next_pid"); + u64 prev_state = evsel__intval(evsel, sample, "prev_state"); sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid, prev_state, backtrace); @@ -638,8 +638,8 @@ process_sample_power_start(struct timechart *tchart __maybe_unused, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); - u64 value = perf_evsel__intval(evsel, sample, "value"); + u64 cpu_id = evsel__intval(evsel, sample, "cpu_id"); + u64 value = evsel__intval(evsel, sample, "value"); c_state_start(cpu_id, sample->time, value); return 0; @@ -661,8 +661,8 @@ process_sample_power_frequency(struct timechart *tchart, struct perf_sample *sample, const char *backtrace __maybe_unused) { - u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id"); - u64 value = perf_evsel__intval(evsel, sample, "value"); + u64 cpu_id = evsel__intval(evsel, sample, "cpu_id"); + u64 value = evsel__intval(evsel, sample, "value"); p_state_change(tchart, cpu_id, sample->time, value); return 0; @@ -843,7 +843,7 @@ process_enter_read(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ, sample->time, fd); } @@ -853,7 +853,7 @@ process_exit_read(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ, sample->time, ret); } @@ -863,7 +863,7 @@ process_enter_write(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE, sample->time, fd); } @@ -873,7 +873,7 @@ process_exit_write(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE, sample->time, ret); } @@ -883,7 +883,7 @@ process_enter_sync(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC, sample->time, fd); } @@ -893,7 +893,7 @@ process_exit_sync(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC, sample->time, ret); } @@ -903,7 +903,7 @@ process_enter_tx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX, sample->time, fd); } @@ -913,7 +913,7 @@ process_exit_tx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX, sample->time, ret); } @@ -923,7 +923,7 @@ process_enter_rx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX, sample->time, fd); } @@ -933,7 +933,7 @@ process_exit_rx(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX, sample->time, ret); } @@ -943,7 +943,7 @@ process_enter_poll(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long fd = perf_evsel__intval(evsel, sample, "fd"); + long fd = evsel__intval(evsel, sample, "fd"); return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL, sample->time, fd); } @@ -953,7 +953,7 @@ process_exit_poll(struct timechart *tchart, struct evsel *evsel, struct perf_sample *sample) { - long ret = perf_evsel__intval(evsel, sample, "ret"); + long ret = evsel__intval(evsel, sample, "ret"); return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL, sample->time, ret); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 289cf83e658a..372c38254654 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -33,6 +33,7 @@ #include "util/map.h" #include "util/mmap.h" #include "util/session.h" +#include "util/thread.h" #include "util/symbol.h" #include "util/synthetic-events.h" #include "util/top.h" @@ -254,7 +255,7 @@ static void perf_top__show_details(struct perf_top *top) if (notes->src == NULL) goto out_unlock; - printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); + printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name); printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts); @@ -297,8 +298,7 @@ static void perf_top__resort_hists(struct perf_top *t) hists__collapse_resort(hists, NULL); /* Non-group events are considered as leader */ - if (symbol_conf.event_group && - !perf_evsel__is_group_leader(pos)) { + if (symbol_conf.event_group && !evsel__is_group_leader(pos)) { struct hists *leader_hists = evsel__hists(pos->leader); hists__match(leader_hists, hists); @@ -441,7 +441,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries); if (top->evlist->core.nr_entries > 1) - fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel)); + fprintf(stdout, "\t[E] active event counter. \t(%s)\n", evsel__name(top->sym_evsel)); fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); @@ -528,13 +528,13 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) fprintf(stderr, "\nAvailable events:"); evlist__for_each_entry(top->evlist, top->sym_evsel) - fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel)); + fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, evsel__name(top->sym_evsel)); prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->core.nr_entries) { top->sym_evsel = evlist__first(top->evlist); - fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); + fprintf(stderr, "Sorry, no such event, using %s.\n", evsel__name(top->sym_evsel)); sleep(1); break; } @@ -775,6 +775,9 @@ static void perf_event__process_sample(struct perf_tool *tool, if (machine__resolve(machine, &al, sample) < 0) return; + if (top->stitch_lbr) + al.thread->lbr_stitch_enable = true; + if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { @@ -1042,14 +1045,13 @@ try_again: perf_top_overwrite_fallback(top, counter)) goto try_again; - if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (evsel__fallback(counter, errno, msg, sizeof(msg))) { if (verbose > 0) ui__warning("%s\n", msg); goto try_again; } - perf_evsel__open_strerror(counter, &opts->target, - errno, msg, sizeof(msg)); + evsel__open_strerror(counter, &opts->target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); goto out_err; } @@ -1571,10 +1573,11 @@ int cmd_top(int argc, const char **argv) "Sort the output by the event at the index n in group. " "If n is invalid, sort by the first event. " "WARNING: should be used on grouped events."), + OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr, + "Enable LBR callgraph stitching approach"), OPTS_EVSWITCH(&top.evswitch), OPT_END() }; - struct evlist *sb_evlist = NULL; const char * const top_usage[] = { "perf top [<options>]", NULL @@ -1640,6 +1643,11 @@ int cmd_top(int argc, const char **argv) } } + if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) { + pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n"); + goto out_delete_evlist; + } + if (opts->branch_stack && callchain_param.enabled) symbol_conf.show_branchflag_count = true; @@ -1732,10 +1740,21 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - if (!top.record_opts.no_bpf_event) - bpf_event__add_sb_event(&sb_evlist, &perf_env); + if (!top.record_opts.no_bpf_event) { + top.sb_evlist = evlist__new(); + + if (top.sb_evlist == NULL) { + pr_err("Couldn't create side band evlist.\n."); + goto out_delete_evlist; + } + + if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) { + pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n."); + goto out_delete_evlist; + } + } - if (perf_evlist__start_sb_thread(sb_evlist, target)) { + if (perf_evlist__start_sb_thread(top.sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; } @@ -1743,7 +1762,7 @@ int cmd_top(int argc, const char **argv) status = __cmd_top(&top); if (!opts->no_bpf_event) - perf_evlist__stop_sb_thread(sb_evlist); + perf_evlist__stop_sb_thread(top.sb_evlist); out_delete_evlist: evlist__delete(top.evlist); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 01d542007c8b..a46efb907bd4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -366,11 +366,9 @@ out_delete: return NULL; } -static int perf_evsel__init_tp_uint_field(struct evsel *evsel, - struct tp_field *field, - const char *name) +static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name) { - struct tep_format_field *format_field = perf_evsel__field(evsel, name); + struct tep_format_field *format_field = evsel__field(evsel, name); if (format_field == NULL) return -1; @@ -380,13 +378,11 @@ static int perf_evsel__init_tp_uint_field(struct evsel *evsel, #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ - perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); }) + evsel__init_tp_uint_field(evsel, &sc->name, #name); }) -static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, - struct tp_field *field, - const char *name) +static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name) { - struct tep_format_field *format_field = perf_evsel__field(evsel, name); + struct tep_format_field *format_field = evsel__field(evsel, name); if (format_field == NULL) return -1; @@ -396,7 +392,7 @@ static int perf_evsel__init_tp_ptr_field(struct evsel *evsel, #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ - perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) + evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) static void evsel__delete_priv(struct evsel *evsel) { @@ -404,13 +400,13 @@ static void evsel__delete_priv(struct evsel *evsel) evsel__delete(evsel); } -static int perf_evsel__init_syscall_tp(struct evsel *evsel) +static int evsel__init_syscall_tp(struct evsel *evsel) { struct syscall_tp *sc = evsel__syscall_tp(evsel); if (sc != NULL) { - if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && - perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr")) + if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") && + evsel__init_tp_uint_field(evsel, &sc->id, "nr")) return -ENOENT; return 0; } @@ -418,14 +414,14 @@ static int perf_evsel__init_syscall_tp(struct evsel *evsel) return -ENOMEM; } -static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) +static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) { struct syscall_tp *sc = evsel__syscall_tp(evsel); if (sc != NULL) { - struct tep_format_field *syscall_id = perf_evsel__field(tp, "id"); + struct tep_format_field *syscall_id = evsel__field(tp, "id"); if (syscall_id == NULL) - syscall_id = perf_evsel__field(tp, "__syscall_nr"); + syscall_id = evsel__field(tp, "__syscall_nr"); if (syscall_id == NULL || __tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap)) return -EINVAL; @@ -436,21 +432,21 @@ static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evs return -ENOMEM; } -static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel) +static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); } -static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) +static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); } -static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) +static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) { if (evsel__syscall_tp(evsel) != NULL) { if (perf_evsel__init_sc_tp_uint_field(evsel, id)) @@ -474,7 +470,7 @@ static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void * if (IS_ERR(evsel)) return NULL; - if (perf_evsel__init_raw_syscall_tp(evsel, handler)) + if (evsel__init_raw_syscall_tp(evsel, handler)) goto out_delete; return evsel; @@ -1801,7 +1797,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) return syscall__set_arg_fmts(sc); } -static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel) +static int evsel__init_tp_arg_scnprintf(struct evsel *evsel) { struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); @@ -2074,7 +2070,7 @@ static struct syscall *trace__syscall_info(struct trace *trace, if (verbose > 1) { static u64 n; fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n", - id, perf_evsel__name(evsel), ++n); + id, evsel__name(evsel), ++n); } return NULL; } @@ -2206,7 +2202,7 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, double ts = (double)sample->time / NSEC_PER_MSEC; printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", - perf_evsel__name(evsel), ts, + evsel__name(evsel), ts, thread__comm_str(thread), sample->pid, sample->tid, sample->cpu); } @@ -2382,7 +2378,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam static const char *errno_to_name(struct evsel *evsel, int err) { - struct perf_env *env = perf_evsel__env(evsel); + struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); return arch_syscalls__strerrno(arch_name, err); @@ -2513,7 +2509,7 @@ errno_print: { if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel)); out: ttrace->entry_pending = false; err = 0; @@ -2531,7 +2527,7 @@ static int trace__vfs_getname(struct trace *trace, struct evsel *evsel, size_t filename_len, entry_str_len, to_move; ssize_t remaining_space; char *pos; - const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); + const char *filename = evsel__rawptr(evsel, sample, "pathname"); if (!thread) goto out; @@ -2587,7 +2583,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { - u64 runtime = perf_evsel__intval(evsel, sample, "runtime"); + u64 runtime = evsel__intval(evsel, sample, "runtime"); double runtime_ms = (double)runtime / NSEC_PER_MSEC; struct thread *thread = machine__findnew_thread(trace->host, sample->pid, @@ -2606,10 +2602,10 @@ out_put: out_dump: fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n", evsel->name, - perf_evsel__strval(evsel, sample, "comm"), - (pid_t)perf_evsel__intval(evsel, sample, "pid"), + evsel__strval(evsel, sample, "comm"), + (pid_t)evsel__intval(evsel, sample, "pid"), runtime, - perf_evsel__intval(evsel, sample, "vruntime")); + evsel__intval(evsel, sample, "vruntime")); goto out_put; } @@ -2774,7 +2770,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, fprintf(trace->output, "%s(", evsel->name); - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); } else if (evsel->tp_format) { if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || @@ -2795,7 +2791,7 @@ newline: if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel)); ++trace->nr_events_printed; @@ -2890,7 +2886,7 @@ static int trace__pgfault(struct trace *trace, if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) - pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel)); ++trace->nr_events_printed; out: @@ -3032,10 +3028,10 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) } evlist__for_each_entry_safe(evlist, evsel, tmp) { - if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname")) + if (!strstarts(evsel__name(evsel), "probe:vfs_getname")) continue; - if (perf_evsel__field(evsel, "pathname")) { + if (evsel__field(evsel, "pathname")) { evsel->handler = trace__vfs_getname; found = true; continue; @@ -3093,7 +3089,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && sample->raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample->tid, + evsel__name(evsel), sample->tid, sample->cpu, sample->raw_size); } else { tracepoint_handler handler = evsel->handler; @@ -3124,8 +3120,8 @@ static int trace__add_syscall_newtp(struct trace *trace) if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) goto out_delete_sys_exit; - perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); - perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); + evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); + evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); evlist__add(evlist, sys_enter); evlist__add(evlist, sys_exit); @@ -3164,10 +3160,9 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace) if (filter == NULL) goto out_enomem; - if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter, - filter)) { + if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) { sys_exit = trace->syscalls.events.sys_exit; - err = perf_evsel__append_tp_filter(sys_exit, filter); + err = evsel__append_tp_filter(sys_exit, filter); } free(filter); @@ -3695,7 +3690,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe, return __trace__deliver_event(trace, event->event); } -static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg) +static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg) { struct tep_format_field *field; struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel); @@ -3750,7 +3745,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel scnprintf(arg, sizeof(arg), "%.*s", left_size, left); - fmt = perf_evsel__syscall_arg_fmt(evsel, arg); + fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg); if (fmt == NULL) { pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n", arg, evsel->name, evsel->filter); @@ -3801,7 +3796,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel if (new_filter != evsel->filter) { pr_debug("New filter for %s: %s\n", evsel->name, new_filter); - perf_evsel__set_filter(evsel, new_filter); + evsel__set_filter(evsel, new_filter); free(new_filter); } @@ -3849,7 +3844,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); if (pgfault_maj == NULL) goto out_error_mem; - perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); + evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); evlist__add(evlist, pgfault_maj); } @@ -3857,7 +3852,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); if (pgfault_min == NULL) goto out_error_mem; - perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); + evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); evlist__add(evlist, pgfault_min); } @@ -4108,7 +4103,7 @@ out_error: out_error_apply_filters: fprintf(trace->output, "Failed to set filter \"%s\" on event %s with %d (%s)\n", - evsel->filter, perf_evsel__name(evsel), errno, + evsel->filter, evsel__name(evsel), errno, str_error_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; } @@ -4179,7 +4174,7 @@ static int trace__replay(struct trace *trace) "syscalls:sys_enter"); if (evsel && - (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || + (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || perf_evsel__init_sc_tp_ptr_field(evsel, args))) { pr_err("Error during initialize raw_syscalls:sys_enter event\n"); goto out; @@ -4191,7 +4186,7 @@ static int trace__replay(struct trace *trace) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && - (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || + (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || perf_evsel__init_sc_tp_uint_field(evsel, ret))) { pr_err("Error during initialize raw_syscalls:sys_exit event\n"); goto out; @@ -4471,11 +4466,11 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist) continue; if (strcmp(evsel->tp_format->system, "syscalls")) { - perf_evsel__init_tp_arg_scnprintf(evsel); + evsel__init_tp_arg_scnprintf(evsel); continue; } - if (perf_evsel__init_syscall_tp(evsel)) + if (evsel__init_syscall_tp(evsel)) return -1; if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) { @@ -4989,7 +4984,7 @@ int cmd_trace(int argc, const char **argv) */ if (trace.syscalls.events.augmented) { evlist__for_each_entry(trace.evlist, evsel) { - bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0; if (raw_syscalls_sys_exit) { trace.raw_augmented_syscalls = true; @@ -4997,10 +4992,10 @@ int cmd_trace(int argc, const char **argv) } if (trace.syscalls.events.augmented->priv == NULL && - strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { + strstr(evsel__name(evsel), "syscalls:sys_enter")) { struct evsel *augmented = trace.syscalls.events.augmented; - if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || - perf_evsel__init_augmented_syscall_tp_args(augmented)) + if (evsel__init_augmented_syscall_tp(augmented, evsel) || + evsel__init_augmented_syscall_tp_args(augmented)) goto out; /* * Augmented is __augmented_syscalls__ BPF_OUTPUT event @@ -5014,16 +5009,16 @@ int cmd_trace(int argc, const char **argv) * as not to filter it, then we'll handle it just like we would * for the BPF_OUTPUT one: */ - if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) || - perf_evsel__init_augmented_syscall_tp_args(evsel)) + if (evsel__init_augmented_syscall_tp(evsel, evsel) || + evsel__init_augmented_syscall_tp_args(evsel)) goto out; evsel->handler = trace__sys_enter; } - if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { + if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) { struct syscall_tp *sc; init_augmented_syscall_tp: - if (perf_evsel__init_augmented_syscall_tp(evsel, evsel)) + if (evsel__init_augmented_syscall_tp(evsel, evsel)) goto out; sc = __evsel__syscall_tp(evsel); /* @@ -5047,7 +5042,7 @@ init_augmented_syscall_tp: */ if (trace.raw_augmented_syscalls) trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset; - perf_evsel__init_augmented_syscall_tp_ret(evsel); + evsel__init_augmented_syscall_tp_ret(evsel); evsel->handler = trace__sys_exit; } } diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 0453ba26cdbd..a42fab308ff6 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -258,7 +258,8 @@ gets schedule to. Per task counters can be created by any user, for their own tasks. A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts -all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. +all events on CPU-x. Per CPU counters need CAP_PERFMON or CAP_SYS_ADMIN +privilege. The 'flags' parameter is currently unused and must be zero. diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json new file mode 100644 index 000000000000..c121e526442a --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json @@ -0,0 +1,19 @@ +[ + { + "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)", + "MetricName": "Memory_RD_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB" + }, + { + "MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )", + "MetricName": "Memory_WR_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB" + }, + { + "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )", + "MetricName": "PowerBUS_Frequency", + "ScaleUnit": "2.5e-7GHz" + } +] diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 53e76d5d5b37..c8f306b572f4 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -26,7 +26,7 @@ struct pmu_event { * Map a CPU to its table of PMU events. The CPU is identified by the * cpuid field, which is an arch-specific identifier for the CPU. * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile - * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c) + * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c) * * The cpuid can contain any character other than the comma. */ diff --git a/tools/perf/scripts/python/bin/flamegraph-record b/tools/perf/scripts/python/bin/flamegraph-record new file mode 100755 index 000000000000..7df5a19c0163 --- /dev/null +++ b/tools/perf/scripts/python/bin/flamegraph-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -g "$@" diff --git a/tools/perf/scripts/python/bin/flamegraph-report b/tools/perf/scripts/python/bin/flamegraph-report new file mode 100755 index 000000000000..53c5dc90c87e --- /dev/null +++ b/tools/perf/scripts/python/bin/flamegraph-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: create flame graphs +perf script -s "$PERF_EXEC_PATH"/scripts/python/flamegraph.py -- "$@" diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py new file mode 100755 index 000000000000..61f3be9add6b --- /dev/null +++ b/tools/perf/scripts/python/flamegraph.py @@ -0,0 +1,124 @@ +# flamegraph.py - create flame graphs from perf samples +# SPDX-License-Identifier: GPL-2.0 +# +# Usage: +# +# perf record -a -g -F 99 sleep 60 +# perf script report flamegraph +# +# Combined: +# +# perf script flamegraph -a -F 99 sleep 60 +# +# Written by Andreas Gerstmayr <agerstmayr@redhat.com> +# Flame Graphs invented by Brendan Gregg <bgregg@netflix.com> +# Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com> + +from __future__ import print_function +import sys +import os +import argparse +import json + + +class Node: + def __init__(self, name, libtype=""): + self.name = name + self.libtype = libtype + self.value = 0 + self.children = [] + + def toJSON(self): + return { + "n": self.name, + "l": self.libtype, + "v": self.value, + "c": self.children + } + + +class FlameGraphCLI: + def __init__(self, args): + self.args = args + self.stack = Node("root") + + if self.args.format == "html" and \ + not os.path.isfile(self.args.template): + print("Flame Graph template {} does not exist. Please install " + "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) " + "package, specify an existing flame graph template " + "(--template PATH) or another output format " + "(--format FORMAT).".format(self.args.template), + file=sys.stderr) + sys.exit(1) + + def find_or_create_node(self, node, name, dso): + libtype = "kernel" if dso == "[kernel.kallsyms]" else "" + if name is None: + name = "[unknown]" + + for child in node.children: + if child.name == name and child.libtype == libtype: + return child + + child = Node(name, libtype) + node.children.append(child) + return child + + def process_event(self, event): + node = self.find_or_create_node(self.stack, event["comm"], None) + if "callchain" in event: + for entry in reversed(event['callchain']): + node = self.find_or_create_node( + node, entry.get("sym", {}).get("name"), event.get("dso")) + else: + node = self.find_or_create_node( + node, entry.get("symbol"), event.get("dso")) + node.value += 1 + + def trace_end(self): + json_str = json.dumps(self.stack, default=lambda x: x.toJSON()) + + if self.args.format == "html": + try: + with open(self.args.template) as f: + output_str = f.read().replace("/** @flamegraph_json **/", + json_str) + except IOError as e: + print("Error reading template file: {}".format(e), file=sys.stderr) + sys.exit(1) + output_fn = self.args.output or "flamegraph.html" + else: + output_str = json_str + output_fn = self.args.output or "stacks.json" + + if output_fn == "-": + sys.stdout.write(output_str) + else: + print("dumping data to {}".format(output_fn)) + try: + with open(output_fn, "w") as out: + out.write(output_str) + except IOError as e: + print("Error writing output file: {}".format(e), file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Create flame graphs.") + parser.add_argument("-f", "--format", + default="html", choices=["json", "html"], + help="output file format") + parser.add_argument("-o", "--output", + help="output file name") + parser.add_argument("--template", + default="/usr/share/d3-flame-graph/d3-flamegraph-base.html", + help="path to flamegraph HTML template") + parser.add_argument("-i", "--input", + help=argparse.SUPPRESS) + + args = parser.parse_args() + cli = FlameGraphCLI(args) + + process_event = cli.process_event + trace_end = cli.trace_end diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index b3d1bf13ca07..c75557aeef0e 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -56,6 +56,7 @@ perf-y += mem2node.o perf-y += maps.o perf-y += time-utils-test.o perf-y += genelf.o +perf-y += api-io.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c new file mode 100644 index 000000000000..2ada86ad6084 --- /dev/null +++ b/tools/perf/tests/api-io.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "debug.h" +#include "tests.h" +#include <api/io.h> +#include <linux/kernel.h> + +#define TEMPL "/tmp/perf-test-XXXXXX" + +#define EXPECT_EQUAL(val, expected) \ +do { \ + if (val != expected) { \ + pr_debug("%s:%d: %d != %d\n", \ + __FILE__, __LINE__, val, expected); \ + ret = -1; \ + } \ +} while (0) + +#define EXPECT_EQUAL64(val, expected) \ +do { \ + if (val != expected) { \ + pr_debug("%s:%d: %lld != %lld\n", \ + __FILE__, __LINE__, val, expected); \ + ret = -1; \ + } \ +} while (0) + +static int make_test_file(char path[PATH_MAX], const char *contents) +{ + ssize_t contents_len = strlen(contents); + int fd; + + strcpy(path, TEMPL); + fd = mkstemp(path); + if (fd < 0) { + pr_debug("mkstemp failed"); + return -1; + } + if (write(fd, contents, contents_len) < contents_len) { + pr_debug("short write"); + close(fd); + unlink(path); + return -1; + } + close(fd); + return 0; +} + +static int setup_test(char path[PATH_MAX], const char *contents, + size_t buf_size, struct io *io) +{ + if (make_test_file(path, contents)) + return -1; + + io->fd = open(path, O_RDONLY); + if (io->fd < 0) { + pr_debug("Failed to open '%s'\n", path); + unlink(path); + return -1; + } + io->buf = malloc(buf_size); + if (io->buf == NULL) { + pr_debug("Failed to allocate memory"); + close(io->fd); + unlink(path); + return -1; + } + io__init(io, io->fd, io->buf, buf_size); + return 0; +} + +static void cleanup_test(char path[PATH_MAX], struct io *io) +{ + free(io->buf); + close(io->fd); + unlink(path); +} + +static int do_test_get_char(const char *test_string, size_t buf_size) +{ + char path[PATH_MAX]; + struct io io; + int ch, ret = 0; + size_t i; + + if (setup_test(path, test_string, buf_size, &io)) + return -1; + + for (i = 0; i < strlen(test_string); i++) { + ch = io__get_char(&io); + + EXPECT_EQUAL(ch, test_string[i]); + EXPECT_EQUAL(io.eof, false); + } + ch = io__get_char(&io); + EXPECT_EQUAL(ch, -1); + EXPECT_EQUAL(io.eof, true); + + cleanup_test(path, &io); + return ret; +} + +static int test_get_char(void) +{ + int i, ret = 0; + size_t j; + + static const char *const test_strings[] = { + "12345678abcdef90", + "a\nb\nc\nd\n", + "\a\b\t\v\f\r", + }; + for (i = 0; i <= 10; i++) { + for (j = 0; j < ARRAY_SIZE(test_strings); j++) { + if (do_test_get_char(test_strings[j], 1 << i)) + ret = -1; + } + } + return ret; +} + +static int do_test_get_hex(const char *test_string, + __u64 val1, int ch1, + __u64 val2, int ch2, + __u64 val3, int ch3, + bool end_eof) +{ + char path[PATH_MAX]; + struct io io; + int ch, ret = 0; + __u64 hex; + + if (setup_test(path, test_string, 4, &io)) + return -1; + + ch = io__get_hex(&io, &hex); + EXPECT_EQUAL64(hex, val1); + EXPECT_EQUAL(ch, ch1); + + ch = io__get_hex(&io, &hex); + EXPECT_EQUAL64(hex, val2); + EXPECT_EQUAL(ch, ch2); + + ch = io__get_hex(&io, &hex); + EXPECT_EQUAL64(hex, val3); + EXPECT_EQUAL(ch, ch3); + + EXPECT_EQUAL(io.eof, end_eof); + + cleanup_test(path, &io); + return ret; +} + +static int test_get_hex(void) +{ + int ret = 0; + + if (do_test_get_hex("12345678abcdef90", + 0x12345678abcdef90, -1, + 0, -1, + 0, -1, + true)) + ret = -1; + + if (do_test_get_hex("1\n2\n3\n", + 1, '\n', + 2, '\n', + 3, '\n', + false)) + ret = -1; + + if (do_test_get_hex("12345678ABCDEF90;a;b", + 0x12345678abcdef90, ';', + 0xa, ';', + 0xb, -1, + true)) + ret = -1; + + if (do_test_get_hex("0x1x2x", + 0, 'x', + 1, 'x', + 2, 'x', + false)) + ret = -1; + + if (do_test_get_hex("x1x", + 0, -2, + 1, 'x', + 0, -1, + true)) + ret = -1; + + if (do_test_get_hex("10000000000000000000000000000abcdefgh99i", + 0xabcdef, 'g', + 0, -2, + 0x99, 'i', + false)) + ret = -1; + + return ret; +} + +static int do_test_get_dec(const char *test_string, + __u64 val1, int ch1, + __u64 val2, int ch2, + __u64 val3, int ch3, + bool end_eof) +{ + char path[PATH_MAX]; + struct io io; + int ch, ret = 0; + __u64 dec; + + if (setup_test(path, test_string, 4, &io)) + return -1; + + ch = io__get_dec(&io, &dec); + EXPECT_EQUAL64(dec, val1); + EXPECT_EQUAL(ch, ch1); + + ch = io__get_dec(&io, &dec); + EXPECT_EQUAL64(dec, val2); + EXPECT_EQUAL(ch, ch2); + + ch = io__get_dec(&io, &dec); + EXPECT_EQUAL64(dec, val3); + EXPECT_EQUAL(ch, ch3); + + EXPECT_EQUAL(io.eof, end_eof); + + cleanup_test(path, &io); + return ret; +} + +static int test_get_dec(void) +{ + int ret = 0; + + if (do_test_get_dec("12345678abcdef90", + 12345678, 'a', + 0, -2, + 0, -2, + false)) + ret = -1; + + if (do_test_get_dec("1\n2\n3\n", + 1, '\n', + 2, '\n', + 3, '\n', + false)) + ret = -1; + + if (do_test_get_dec("12345678;1;2", + 12345678, ';', + 1, ';', + 2, -1, + true)) + ret = -1; + + if (do_test_get_dec("0x1x2x", + 0, 'x', + 1, 'x', + 2, 'x', + false)) + ret = -1; + + if (do_test_get_dec("x1x", + 0, -2, + 1, 'x', + 0, -1, + true)) + ret = -1; + + if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c", + 123456789, 'a', + 0, -2, + 99, 'c', + false)) + ret = -1; + + return ret; +} + +int test__api_io(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + int ret = 0; + + if (test_get_char()) + ret = TEST_FAIL; + if (test_get_hex()) + ret = TEST_FAIL; + if (test_get_dec()) + ret = TEST_FAIL; + return ret; +} diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index b6322eb0f423..3471ec52ea11 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -310,6 +310,10 @@ static struct test generic_tests[] = { .func = test__jit_write_elf, }, { + .desc = "Test api io", + .func = test__api_io, + }, + { .desc = "maps__merge_in", .func = test__maps__merge_in, }, diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index 1e8a9f5c356d..db68894a6f40 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -72,7 +72,7 @@ static int attach__current_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; - err = perf_evsel__open_per_thread(evsel, threads); + err = evsel__open_per_thread(evsel, threads); if (err) { pr_debug("Failed to open event cpu-clock:u\n"); return err; @@ -96,7 +96,7 @@ static int attach__current_enabled(struct evlist *evlist) return -1; } - err = perf_evsel__open_per_thread(evsel, threads); + err = evsel__open_per_thread(evsel, threads); perf_thread_map__put(threads); return err == 0 ? TEST_OK : TEST_FAIL; @@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; - err = perf_evsel__open_per_cpu(evsel, cpus, -1); + err = evsel__open_per_cpu(evsel, cpus, -1); if (err) { if (err == -EACCES) return TEST_SKIP; @@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist) return -1; } - err = perf_evsel__open_per_cpu(evsel, cpus, -1); + err = evsel__open_per_cpu(evsel, cpus, -1); if (err == -EACCES) return TEST_SKIP; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index c727379cf20e..bdcf032f8516 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -109,7 +109,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu TEST_ASSERT_VAL("failed to synthesize attr update scale", !perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale)); - tmp.name = perf_evsel__name(evsel); + tmp.name = evsel__name(evsel); TEST_ASSERT_VAL("failed to synthesize attr update name", !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name)); diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 956205bf9326..61ecd8e33a01 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -20,12 +20,11 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); err = parse_events(evlist, name, NULL); if (err) ret = err; @@ -39,23 +38,22 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (evsel->idx != idx) continue; ++idx; - if (strcmp(perf_evsel__name(evsel), name)) { - pr_debug("%s != %s\n", perf_evsel__name(evsel), name); + if (strcmp(evsel__name(evsel), name)) { + pr_debug("%s != %s\n", evsel__name(evsel), name); ret = -1; } - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); } } } @@ -84,9 +82,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) err = 0; evlist__for_each_entry(evlist, evsel) { - if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) { + if (strcmp(evsel__name(evsel), names[evsel->idx])) { --err; - pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]); + pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]); } } diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 261e6eaaee99..ce8aa32bc3ee 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -8,7 +8,7 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name, int size, bool should_be_signed) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); int is_signed; int ret = 0; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 28313e59d6f6..f9e8e5628836 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -6,11 +6,11 @@ #include <string.h> #include <linux/zalloc.h> -static int test(struct parse_ctx *ctx, const char *e, double val2) +static int test(struct expr_parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, e)) + if (expr__parse(&val, ctx, e, 1)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -22,7 +22,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char **other; double val; int i, ret; - struct parse_ctx ctx; + struct expr_parse_ctx ctx; int num_other; expr__ctx_init(&ctx); @@ -44,21 +44,29 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) return ret; p = "FOO/0"; - ret = expr__parse(&val, &ctx, p); + ret = expr__parse(&val, &ctx, p, 1); TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, &ctx, p); + ret = expr__parse(&val, &ctx, p, 1); TEST_ASSERT_VAL("missing operand", ret == -1); TEST_ASSERT_VAL("find other", - expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); + expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other, 1) == 0); TEST_ASSERT_VAL("find other", num_other == 3); TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR")); TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); TEST_ASSERT_VAL("find other", other[3] == NULL); + TEST_ASSERT_VAL("find other", + expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL, + &other, &num_other, 3) == 0); + TEST_ASSERT_VAL("find other", num_other == 2); + TEST_ASSERT_VAL("find other", !strcmp(other[0], "EVENT1,param=3/")); + TEST_ASSERT_VAL("find other", !strcmp(other[1], "EVENT2,param=3/")); + TEST_ASSERT_VAL("find other", other[2] == NULL); + for (i = 0; i < num_other; i++) zfree(&other[i]); free((void *)other); diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 6367c8f6ca22..7a542f1c1c78 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -280,7 +280,7 @@ static int test1(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -427,7 +427,7 @@ static int test2(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -485,7 +485,7 @@ static int test3(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); callchain_register_param(&callchain_param); @@ -669,7 +669,7 @@ static int test4(struct evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(NULL); diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5f4c0dbb4715..d4b8eb6e337a 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -86,7 +86,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse } evsels[i]->core.attr.wakeup_events = 1; - perf_evsel__set_sample_id(evsels[i], false); + evsel__set_sample_id(evsels[i], false); evlist__add(evlist, evsels[i]); @@ -150,7 +150,7 @@ out_init: if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) { pr_debug("expected %d %s events, got %d\n", expected_nr_events[evsel->idx], - perf_evsel__name(evsel), nr_events[evsel->idx]); + evsel__name(evsel), nr_events[evsel->idx]); err = -1; goto out_delete_evlist; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 93c176523e38..900934be22d2 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -103,15 +103,15 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int if (cpus->map[cpu] >= CPU_SETSIZE) continue; - if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { - pr_debug("perf_evsel__read_on_cpu\n"); + if (evsel__read_on_cpu(evsel, cpu, 0) < 0) { + pr_debug("evsel__read_on_cpu\n"); err = -1; break; } expected = nr_openat_calls + cpu; if (perf_counts(evsel->counts, cpu, 0)->val != expected) { - pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", + pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index c6b2d7aab608..1dc2897d2df9 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -60,7 +60,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_delete_evlist; } - perf_evsel__config(evsel, &opts, NULL); + evsel__config(evsel, &opts, NULL); perf_thread_map__set_pid(evlist->core.threads, 0, getpid()); @@ -108,13 +108,13 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest continue; } - err = perf_evsel__parse_sample(evsel, event, &sample); + err = evsel__parse_sample(evsel, event, &sample); if (err) { pr_debug("Can't parse sample, err = %d\n", err); goto out_delete_evlist; } - tp_flags = perf_evsel__intval(evsel, &sample, "flags"); + tp_flags = evsel__intval(evsel, &sample, "flags"); if (flags != tp_flags) { pr_debug("%s: Expected flags=%#x, got %#x\n", diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 5ebffae18605..db5d8bb8cd06 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -34,7 +34,7 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads) < 0) { + if (evsel__open_per_thread(evsel, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", str_error_r(errno, sbuf, sizeof(sbuf))); @@ -46,13 +46,13 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m close(fd); } - if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) { - pr_debug("perf_evsel__read_on_cpu\n"); + if (evsel__read_on_cpu(evsel, 0, 0) < 0) { + pr_debug("evsel__read_on_cpu\n"); goto out_close_fd; } if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { - pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", + pr_debug("evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); goto out_close_fd; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 091c3aeccc27..895188b63f96 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -371,7 +371,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:u")); + !strcmp(evsel__name(evsel), "mem:0:u")); return test__checkevent_breakpoint(evlist); } @@ -385,7 +385,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:x:k")); + !strcmp(evsel__name(evsel), "mem:0:x:k")); return test__checkevent_breakpoint_x(evlist); } @@ -399,7 +399,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:r:hp")); + !strcmp(evsel__name(evsel), "mem:0:r:hp")); return test__checkevent_breakpoint_r(evlist); } @@ -413,7 +413,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:w:up")); + !strcmp(evsel__name(evsel), "mem:0:w:up")); return test__checkevent_breakpoint_w(evlist); } @@ -427,7 +427,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp")); + !strcmp(evsel__name(evsel), "mem:0:rw:kp")); return test__checkevent_breakpoint_rw(evlist); } @@ -468,7 +468,7 @@ static int test__checkevent_list(struct evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); /* syscalls:sys_enter_openat:k */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type); @@ -479,7 +479,7 @@ static int test__checkevent_list(struct evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); /* 1:1:hp */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); @@ -498,15 +498,15 @@ static int test__checkevent_pmu_name(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config); - TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); + TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava")); /* cpu/config=2/u" */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "cpu/config=2/u")); + !strcmp(evsel__name(evsel), "cpu/config=2/u")); return 0; } @@ -529,7 +529,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist) TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type)); /* cpu/config=2,call-graph=no,time=0,period=2000/ */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config); /* @@ -577,7 +577,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* cpu/pmu-event/u*/ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); TEST_ASSERT_VAL("wrong exclude_user", @@ -652,13 +652,13 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:upp */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -670,7 +670,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -694,13 +694,13 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cache-references + :u modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config); @@ -711,11 +711,11 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:k */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -725,7 +725,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -750,15 +750,15 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong group name", !strcmp(leader->group_name, "group1")); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group1 cycles:kppp */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -771,11 +771,11 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group2 cycles + G modifier */ - evsel = leader = perf_evsel__next(evsel); + evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -785,15 +785,15 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong group name", !strcmp(leader->group_name, "group2")); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* group2 1:3 + G modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); @@ -803,11 +803,11 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:u */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -817,7 +817,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -843,13 +843,13 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:kp + p */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -861,7 +861,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); return 0; @@ -886,13 +886,13 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions + G */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -903,11 +903,11 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* cycles:G */ - evsel = leader = perf_evsel__next(evsel); + evsel = leader = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -918,13 +918,13 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read); /* instructions:G */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config); @@ -935,10 +935,10 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); /* cycles */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); @@ -948,7 +948,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); return 0; } @@ -972,12 +972,12 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:G + :H group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -988,7 +988,7 @@ static int test__group_gh1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1012,12 +1012,12 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:H + :G group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1028,7 +1028,7 @@ static int test__group_gh2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1052,12 +1052,12 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:H + :u group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1068,7 +1068,7 @@ static int test__group_gh3(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1092,12 +1092,12 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); - TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel)); + TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0); /* cache-misses:H + :uG group modifier */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1108,7 +1108,7 @@ static int test__group_gh4(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); - TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1); + TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1); return 0; } @@ -1135,7 +1135,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* cache-misses - not sampling */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); @@ -1149,7 +1149,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* branch-misses - not sampling */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); @@ -1188,7 +1188,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read); /* branch-misses - not sampling */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); @@ -1234,14 +1234,14 @@ static int test__pinned_group(struct evlist *evlist) TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned); /* cache-misses - can not be pinned, but will go on with the leader */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); /* branch-misses - ditto */ - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); TEST_ASSERT_VAL("wrong config", PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); @@ -1356,6 +1356,16 @@ static int test__checkevent_complex_name(struct evlist *evlist) return 0; } +static int test__checkevent_raw_pmu(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + return 0; +} + static int test__sym_event_slash(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -1750,7 +1760,12 @@ static struct evlist_test test__events_pmu[] = { .name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp", .check = test__checkevent_complex_name, .id = 3, - } + }, + { + .name = "software/r1a/", + .check = test__checkevent_raw_pmu, + .id = 4, + }, }; struct terms_test { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 2195fc205e72..83adfd846ccd 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -106,9 +106,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * Config the evsels, setting attr->comm on the first one, etc. */ evsel = evlist__first(evlist); - perf_evsel__set_sample_bit(evsel, CPU); - perf_evsel__set_sample_bit(evsel, TID); - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, TIME); perf_evlist__config(evlist, &opts, NULL); err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask); diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 61865699c3f4..a0bdaf390ac8 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -296,12 +296,12 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) goto out_free; } - evsel.sample_size = __perf_evsel__sample_size(sample_type); + evsel.sample_size = __evsel__sample_size(sample_type); - err = perf_evsel__parse_sample(&evsel, event, &sample_out); + err = evsel__parse_sample(&evsel, event, &sample_out); if (err) { pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", - "perf_evsel__parse_sample", sample_type, err); + "evsel__parse_sample", sample_type, err); goto out_free; } diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index fcb0d03dba4e..db5e1f70053a 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -135,8 +135,8 @@ static int process_sample_event(struct evlist *evlist, evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == switch_tracking->switch_evsel) { - next_tid = perf_evsel__intval(evsel, &sample, "next_pid"); - prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid"); + next_tid = evsel__intval(evsel, &sample, "next_pid"); + prev_tid = evsel__intval(evsel, &sample, "prev_pid"); cpu = sample.cpu; pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n", cpu, prev_tid, next_tid); @@ -394,8 +394,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ switch_evsel = evlist__last(evlist); - perf_evsel__set_sample_bit(switch_evsel, CPU); - perf_evsel__set_sample_bit(switch_evsel, TIME); + evsel__set_sample_bit(switch_evsel, CPU); + evsel__set_sample_bit(switch_evsel, TIME); switch_evsel->core.system_wide = true; switch_evsel->no_aux_samples = true; @@ -412,8 +412,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out_err; } - perf_evsel__set_sample_bit(cycles_evsel, CPU); - perf_evsel__set_sample_bit(cycles_evsel, TIME); + evsel__set_sample_bit(cycles_evsel, CPU); + evsel__set_sample_bit(cycles_evsel, TIME); /* Fourth event */ err = parse_events(evlist, "dummy:u", NULL); @@ -429,7 +429,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ tracking_evsel->core.attr.freq = 0; tracking_evsel->core.attr.sample_period = 1; - perf_evsel__set_sample_bit(tracking_evsel, TIME); + evsel__set_sample_bit(tracking_evsel, TIME); /* Config events */ perf_evlist__config(evlist, &opts, NULL); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 61a1ab032080..d6d4ac34eeb7 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -112,6 +112,7 @@ int test__mem2node(struct test *t, int subtest); int test__maps__merge_in(struct test *t, int subtest); int test__time_utils(struct test *t, int subtest); int test__jit_write_elf(struct test *test, int subtest); +int test__api_io(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 4a800499d7c3..22daf2bdf5fa 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -33,10 +33,8 @@ static int session_write_header(char *path) { struct perf_session *session; struct perf_data data = { - .file = { - .path = path, - }, - .mode = PERF_DATA_MODE_WRITE, + .path = path, + .mode = PERF_DATA_MODE_WRITE, }; session = perf_session__new(&data, false, NULL); @@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) { struct perf_session *session; struct perf_data data = { - .file = { - .path = path, - }, - .mode = PERF_DATA_MODE_READ, + .path = path, + .mode = PERF_DATA_MODE_READ, }; int i; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 487e54ef56a9..f98a118dfc49 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3416,7 +3416,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, struct hists *hists = evsel__hists(evsel); bool current_entry = ui_browser__is_current_entry(browser, row); unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE]; - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char bf[256], unit; const char *warn = " "; size_t printed; @@ -3424,10 +3424,10 @@ static void perf_evsel_menu__write(struct ui_browser *browser, ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : HE_COLORSET_NORMAL); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - ev_name = perf_evsel__group_name(evsel); + ev_name = evsel__group_name(evsel); for_each_group_member(pos, evsel) { struct hists *pos_hists = evsel__hists(pos); @@ -3512,13 +3512,13 @@ browse_hists: if (pos->core.node.next == &evlist->core.entries) pos = evlist__first(evlist); else - pos = perf_evsel__next(pos); + pos = evsel__next(pos); goto browse_hists; case K_UNTAB: if (pos->core.node.prev == &evlist->core.entries) pos = evlist__last(evlist); else - pos = perf_evsel__prev(pos); + pos = evsel__prev(pos); goto browse_hists; case K_SWITCH_INPUT_DATA: case K_RELOAD: @@ -3554,7 +3554,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused, { struct evsel *evsel = list_entry(entry, struct evsel, core.node); - if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel)) + if (symbol_conf.event_group && !evsel__is_group_leader(evsel)) return true; return false; @@ -3587,7 +3587,7 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist, ui_helpline__push("Press ESC to exit"); evlist__for_each_entry(evlist, pos) { - const char *ev_name = perf_evsel__name(pos); + const char *ev_name = evsel__name(pos); size_t line_len = strlen(ev_name) + 7; if (menu.b.width < line_len) @@ -3622,7 +3622,7 @@ single_entry: nr_entries = 0; evlist__for_each_entry(evlist, pos) { - if (perf_evsel__is_group_leader(pos)) + if (evsel__is_group_leader(pos)) nr_entries++; } @@ -3640,7 +3640,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, size_t size) { struct hists *hists = evsel__hists(browser->block_evsel); - const char *evname = perf_evsel__name(browser->block_evsel); + const char *evname = evsel__name(browser->block_evsel); unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; int ret; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 35f9641bf670..a7dff77f2018 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -130,7 +130,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms, gtk_list_store_append(store, &iter); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { for (i = 0; i < evsel->core.nr_members; i++) { ret += perf_gtk__get_percent(s + ret, sizeof(s) - ret, diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index ed1a97b2c4b0..53ef71a1b15d 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -635,18 +635,18 @@ int perf_evlist__gtk_browse_hists(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - const char *evname = perf_evsel__name(pos); + const char *evname = evsel__name(pos); GtkWidget *scrolled_window; GtkWidget *tab_label; char buf[512]; size_t size = sizeof(buf); if (symbol_conf.event_group) { - if (!perf_evsel__is_group_leader(pos)) + if (!evsel__is_group_leader(pos)) continue; if (pos->core.nr_members > 1) { - perf_evsel__group_desc(pos, buf, size); + evsel__group_desc(pos, buf, size); evname = buf; } } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 025f4c7f96bf..c1f24d004852 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -43,12 +43,12 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, } else ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he)); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { int prev_idx, idx_delta; struct hist_entry *pair; int nr_members = evsel->core.nr_members; - prev_idx = perf_evsel__group_idx(evsel); + prev_idx = evsel__group_idx(evsel); list_for_each_entry(pair, &he->pairs.head, pairs.node) { u64 period = get_field(pair); @@ -58,7 +58,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, continue; evsel = hists_to_evsel(pair->hists); - idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; + idx_delta = evsel__group_idx(evsel) - prev_idx - 1; while (idx_delta--) { /* @@ -82,7 +82,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, len, period); } - prev_idx = perf_evsel__group_idx(evsel); + prev_idx = evsel__group_idx(evsel); } idx_delta = nr_members - prev_idx - 1; @@ -164,12 +164,12 @@ static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b, list_for_each_entry(pair, &a->pairs.head, pairs.node) { struct evsel *evsel = hists_to_evsel(pair->hists); - fa[perf_evsel__group_idx(evsel)] = get_field(pair); + fa[evsel__group_idx(evsel)] = get_field(pair); } list_for_each_entry(pair, &b->pairs.head, pairs.node) { struct evsel *evsel = hists_to_evsel(pair->hists); - fb[perf_evsel__group_idx(evsel)] = get_field(pair); + fb[evsel__group_idx(evsel)] = get_field(pair); } *fields_a = fa; @@ -190,7 +190,7 @@ static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b, int cmp, nr_members, ret, i; cmp = field_cmp(get_field(a), get_field(b)); - if (!perf_evsel__is_group_event(evsel)) + if (!evsel__is_group_event(evsel)) return cmp; nr_members = evsel->core.nr_members; @@ -240,7 +240,7 @@ static int __hpp__sort(struct hist_entry *a, struct hist_entry *b, return ret; evsel = hists_to_evsel(a->hists); - if (!perf_evsel__is_group_event(evsel)) + if (!evsel__is_group_event(evsel)) return ret; nr_members = evsel->core.nr_members; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c0cf8dff694e..ca07a162d602 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o +perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o perf-y += perf_event_attr_fprintf.o @@ -88,6 +89,7 @@ perf-y += counts.o perf-y += stat.o perf-y += stat-shadow.o perf-y += stat-display.o +perf-y += perf_api_probe.o perf-y += record.o perf-y += srcline.o perf-y += srccode.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f1ea0d61eb5b..d828c2d2edee 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1191,7 +1191,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args) struct disasm_line *dl = NULL; int nr = 1; - if (perf_evsel__is_group_event(args->evsel)) + if (evsel__is_group_event(args->evsel)) nr = args->evsel->core.nr_members; dl = zalloc(disasm_line_size(nr)); @@ -1437,7 +1437,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue) return -1; - if (perf_evsel__is_group_event(evsel)) + if (evsel__is_group_event(evsel)) width *= evsel->core.nr_members; if (!*al->line) @@ -1821,6 +1821,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int +symbol__disassemble_bpf_image(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct disasm_line *dl; + + args->offset = -1; + args->line = strdup("to be implemented"); + args->line_nr = 0; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + free(args->line); + return 0; +} + /* * Possibly create a new version of line with tabs expanded. Returns the * existing or new line, storage is updated if a new line is allocated. If @@ -1920,6 +1938,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { return symbol__disassemble_bpf(sym, args); + } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); } else if (dso__is_kcore(dso)) { kce.kcore_filename = symfs_filename; kce.addr = map__rip_2objdump(map, sym->start); @@ -2136,7 +2156,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, .evsel = evsel, .options = options, }; - struct perf_env *env = perf_evsel__env(evsel); + struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); struct arch *arch; int err; @@ -2324,7 +2344,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, struct dso *dso = map->dso; char *filename; const char *d_filename; - const char *evsel_name = perf_evsel__name(evsel); + const char *evsel_name = evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct annotation_line *pos, *queue = NULL; @@ -2348,9 +2368,9 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel, len = symbol__size(sym); - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { width *= evsel->core.nr_members; - perf_evsel__group_desc(evsel, buf, sizeof(buf)); + evsel__group_desc(evsel, buf, sizeof(buf)); evsel_name = buf; } @@ -2485,7 +2505,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *opts) { - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char buf[1024]; char *filename; int err = -1; @@ -2498,8 +2518,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel, if (fp == NULL) goto out_free_filename; - if (perf_evsel__is_group_event(evsel)) { - perf_evsel__group_desc(evsel, buf, sizeof(buf)); + if (evsel__is_group_event(evsel)) { + evsel__group_desc(evsel, buf, sizeof(buf)); ev_name = buf; } @@ -3044,7 +3064,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (notes->offsets == NULL) return ENOMEM; - if (perf_evsel__is_group_event(evsel)) + if (evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; err = symbol__annotate(ms, evsel, options, parch); diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 53be12b23ff4..875a0dd540e5 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -176,6 +176,14 @@ static void arm_spe_free(struct perf_session *session) free(spe); } +static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace); + + return evsel->core.attr.type == spe->pmu_type; +} + static const char * const arm_spe_info_fmts[] = { [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", }; @@ -218,6 +226,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.flush_events = arm_spe_flush; spe->auxtrace.free_events = arm_spe_free_events; spe->auxtrace.free = arm_spe_free; + spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; arm_spe_print_info(&auxtrace_info->priv[0]); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3571ce72ca28..749487a41cc7 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -33,6 +33,7 @@ #include "evsel.h" #include "evsel_config.h" #include "symbol.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "thread_map.h" #include "asm/bug.h" @@ -58,25 +59,6 @@ #include "symbol/kallsyms.h" #include <internal/lib.h> -static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel) -{ - struct perf_pmu *pmu = NULL; - - while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu->type == evsel->core.attr.type) - break; - } - - return pmu; -} - -static bool perf_evsel__is_aux_event(struct evsel *evsel) -{ - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); - - return pmu && pmu->auxtrace; -} - /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. @@ -88,7 +70,7 @@ static int perf_evlist__regroup(struct evlist *evlist, struct evsel *evsel; bool grp; - if (!perf_evsel__is_group_leader(leader)) + if (!evsel__is_group_leader(leader)) return -EINVAL; grp = false; @@ -703,8 +685,8 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, evlist__for_each_entry(evlist, evsel) { sz = evsel->core.attr.aux_sample_size; - if (perf_evsel__is_group_leader(evsel)) { - has_aux_leader = perf_evsel__is_aux_event(evsel); + if (evsel__is_group_leader(evsel)) { + has_aux_leader = evsel__is_aux_event(evsel); if (sz) { if (has_aux_leader) pr_err("Cannot add AUX area sampling to an AUX area event\n"); @@ -723,10 +705,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist, pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n"); return -EINVAL; } - perf_evsel__set_sample_bit(evsel, AUX); + evsel__set_sample_bit(evsel, AUX); opts->auxtrace_sample_mode = true; } else { - perf_evsel__reset_sample_bit(evsel, AUX); + evsel__reset_sample_bit(evsel, AUX); } } @@ -777,8 +759,8 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr, /* Set aux_sample_size based on --aux-sample option */ evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_group_leader(evsel)) { - has_aux_leader = perf_evsel__is_aux_event(evsel); + if (evsel__is_group_leader(evsel)) { + has_aux_leader = evsel__is_aux_event(evsel); } else if (has_aux_leader) { evsel->core.attr.aux_sample_size = sz; } @@ -787,7 +769,7 @@ no_opt: aux_evsel = NULL; /* Override with aux_sample_size from config term */ evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_aux_event(evsel)) + if (evsel__is_aux_event(evsel)) aux_evsel = evsel; term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE); if (term) { @@ -1234,29 +1216,79 @@ out_free: return err; } +static void unleader_evsel(struct evlist *evlist, struct evsel *leader) +{ + struct evsel *new_leader = NULL; + struct evsel *evsel; + + /* Find new leader for the group */ + evlist__for_each_entry(evlist, evsel) { + if (evsel->leader != leader || evsel == leader) + continue; + if (!new_leader) + new_leader = evsel; + evsel->leader = new_leader; + } + + /* Update group information */ + if (new_leader) { + zfree(&new_leader->group_name); + new_leader->group_name = leader->group_name; + leader->group_name = NULL; + + new_leader->core.nr_members = leader->core.nr_members - 1; + leader->core.nr_members = 1; + } +} + +static void unleader_auxtrace(struct perf_session *session) +{ + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (auxtrace__evsel_is_auxtrace(session, evsel) && + evsel__is_group_leader(evsel)) { + unleader_evsel(session->evlist, evsel); + } + } +} + int perf_event__process_auxtrace_info(struct perf_session *session, union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; + int err; if (dump_trace) fprintf(stdout, " type: %u\n", type); switch (type) { case PERF_AUXTRACE_INTEL_PT: - return intel_pt_process_auxtrace_info(event, session); + err = intel_pt_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_INTEL_BTS: - return intel_bts_process_auxtrace_info(event, session); + err = intel_bts_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_ARM_SPE: - return arm_spe_process_auxtrace_info(event, session); + err = arm_spe_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_CS_ETM: - return cs_etm__process_auxtrace_info(event, session); + err = cs_etm__process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_S390_CPUMSF: - return s390_cpumsf_process_auxtrace_info(event, session); + err = s390_cpumsf_process_auxtrace_info(event, session); + break; case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; } + + if (err) + return err; + + unleader_auxtrace(session); + + return 0; } s64 perf_event__process_auxtrace(struct perf_session *session, @@ -1412,8 +1444,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->branches = true; synth_opts->returns = true; break; + case 'G': case 'g': - synth_opts->callchain = true; + if (p[-1] == 'G') + synth_opts->add_callchain = true; + else + synth_opts->callchain = true; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; while (*p == ' ' || *p == ',') @@ -1428,8 +1464,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->callchain_sz = val; } break; + case 'L': case 'l': - synth_opts->last_branch = true; + if (p[-1] == 'L') + synth_opts->add_last_branch = true; + else + synth_opts->last_branch = true; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; while (*p == ' ' || *p == ',') @@ -2482,7 +2522,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter, goto out_exit; } - if (perf_evsel__append_addr_filter(evsel, new_filter)) { + if (evsel__append_addr_filter(evsel, new_filter)) { err = -ENOMEM; goto out_exit; } @@ -2500,9 +2540,9 @@ out_exit: return err; } -static int perf_evsel__nr_addr_filter(struct evsel *evsel) +static int evsel__nr_addr_filter(struct evsel *evsel) { - struct perf_pmu *pmu = perf_evsel__find_pmu(evsel); + struct perf_pmu *pmu = evsel__find_pmu(evsel); int nr_addr_filters = 0; if (!pmu) @@ -2521,7 +2561,7 @@ int auxtrace_parse_filters(struct evlist *evlist) evlist__for_each_entry(evlist, evsel) { filter = evsel->filter; - max_nr = perf_evsel__nr_addr_filter(evsel); + max_nr = evsel__nr_addr_filter(evsel); if (!filter || !max_nr) continue; evsel->filter = NULL; @@ -2577,3 +2617,12 @@ void auxtrace__free(struct perf_session *session) return session->auxtrace->free(session); } + +bool auxtrace__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace) + return false; + + return session->auxtrace->evsel_is_auxtrace(session, evsel); +} diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e58ef160b599..0220a2e86c16 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -21,6 +21,7 @@ union perf_event; struct perf_session; struct evlist; +struct evsel; struct perf_tool; struct mmap; struct perf_sample; @@ -73,8 +74,10 @@ enum itrace_period_type { * @calls: limit branch samples to calls (can be combined with @returns) * @returns: limit branch samples to returns (can be combined with @calls) * @callchain: add callchain to 'instructions' events + * @add_callchain: add callchain to existing event records * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events + * @add_last_branch: add branch context to existing event records * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -100,8 +103,10 @@ struct itrace_synth_opts { bool calls; bool returns; bool callchain; + bool add_callchain; bool thread_stack; bool last_branch; + bool add_last_branch; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; @@ -166,6 +171,8 @@ struct auxtrace { struct perf_tool *tool); void (*free_events)(struct perf_session *session); void (*free)(struct perf_session *session); + bool (*evsel_is_auxtrace)(struct perf_session *session, + struct evsel *evsel); }; /** @@ -584,6 +591,8 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session, int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool); void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); +bool auxtrace__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel); #define ITRACE_HELP \ " i: synthesize instructions events\n" \ @@ -750,6 +759,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused) } static inline +bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused, + struct evsel *evsel __maybe_unused) +{ + return false; +} + +static inline int auxtrace_parse_filters(struct evlist *evlist __maybe_unused) { return 0; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a3207d900339..3742511a08d1 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -6,6 +6,9 @@ #include <bpf/libbpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/string.h> +#include <internal/lib.h> +#include <symbol/kallsyms.h> #include "bpf-event.h" #include "debug.h" #include "dso.h" @@ -290,11 +293,82 @@ out: return err ? -1 : 0; } +struct kallsyms_parse { + union perf_event *event; + perf_event__handler_t process; + struct machine *machine; + struct perf_tool *tool; +}; + +static int +process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data) +{ + struct machine *machine = data->machine; + union perf_event *event = data->event; + struct perf_record_ksymbol *ksymbol; + int len; + + ksymbol = &event->ksymbol; + + *ksymbol = (struct perf_record_ksymbol) { + .header = { + .type = PERF_RECORD_KSYMBOL, + .size = offsetof(struct perf_record_ksymbol, name), + }, + .addr = addr, + .len = page_size, + .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, + .flags = 0, + }; + + len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name); + ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64)); + memset((void *) event + event->header.size, 0, machine->id_hdr_size); + event->header.size += machine->id_hdr_size; + + return perf_tool__process_synth_event(data->tool, event, machine, + data->process); +} + +static int +kallsyms_process_symbol(void *data, const char *_name, + char type __maybe_unused, u64 start) +{ + char disp[KSYM_NAME_LEN]; + char *module, *name; + unsigned long id; + int err = 0; + + module = strchr(_name, '\t'); + if (!module) + return 0; + + /* We are going after [bpf] module ... */ + if (strcmp(module + 1, "[bpf]")) + return 0; + + name = memdup(_name, (module - _name) + 1); + if (!name) + return -ENOMEM; + + name[module - _name] = 0; + + /* .. and only for trampolines and dispatchers */ + if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) || + (sscanf(name, "bpf_dispatcher_%s", disp) == 1)) + err = process_bpf_image(name, start, data); + + free(name); + return err; +} + int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts) { + const char *kallsyms_filename = "/proc/kallsyms"; + struct kallsyms_parse arg; union perf_event *event; __u32 id = 0; int err; @@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size); if (!event) return -1; + + /* Synthesize all the bpf programs in system. */ while (true) { err = bpf_prog_get_next_id(id, &id); if (err) { @@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, break; } } + + /* Synthesize all the bpf images - trampolines/dispatchers. */ + if (symbol_conf.kallsyms_name != NULL) + kallsyms_filename = symbol_conf.kallsyms_name; + + arg = (struct kallsyms_parse) { + .event = event, + .process = process, + .machine = machine, + .tool = session->tool, + }; + + if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) { + pr_err("%s: failed to synthesize bpf images: %s\n", + __func__, strerror(errno)); + } + free(event); return err; } @@ -416,8 +509,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data) return 0; } -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env) +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) { struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 81fdc88e6c1a..68f315c3df5b 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -33,8 +33,7 @@ struct btf_node { #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); -int bpf_event__add_sb_event(struct evlist **evlist, - struct perf_env *env); +int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, struct perf_env *env, FILE *fp); @@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused, return 0; } -static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused, - struct perf_env *env __maybe_unused) +static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, + struct perf_env *env __maybe_unused) { return 0; } diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 10c187b8b8ea..83bfb8768235 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1430,7 +1430,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; } - if (perf_evsel__is_bpf_output(evsel)) + if (evsel__is_bpf_output(evsel)) check_pass = true; if (attr->type == PERF_TYPE_RAW) check_pass = true; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 154a05cd03af..4d3f02fa223d 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -15,13 +15,18 @@ #include "event.h" struct branch_flags { - u64 mispred:1; - u64 predicted:1; - u64 in_tx:1; - u64 abort:1; - u64 cycles:16; - u64 type:4; - u64 reserved:40; + union { + u64 value; + struct { + u64 mispred:1; + u64 predicted:1; + u64 in_tx:1; + u64 abort:1; + u64 cycles:16; + u64 type:4; + u64 reserved:40; + }; + }; }; struct branch_info { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 706bb7bbe1e1..8f668ee29f25 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -143,6 +143,9 @@ struct callchain_cursor_node { u64 ip; struct map_symbol ms; const char *srcline; + /* Indicate valid cursor node for LBR stitch */ + bool valid; + bool branch; struct branch_flags branch_flags; u64 branch_from; @@ -151,6 +154,11 @@ struct callchain_cursor_node { struct callchain_cursor_node *next; }; +struct stitch_list { + struct list_head node; + struct callchain_cursor_node cursor; +}; + struct callchain_cursor { u64 nr; struct callchain_cursor_node *first; diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h index 051dc590ceee..ae52878c0b2e 100644 --- a/tools/perf/util/cap.h +++ b/tools/perf/util/cap.h @@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused) #define CAP_SYSLOG 34 #endif +#ifndef CAP_PERFMON +#define CAP_PERFMON 38 +#endif + #endif /* __PERF_CAP_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index b73fb7823048..050dea9f1e88 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -107,7 +107,8 @@ found: static void cgroup__delete(struct cgroup *cgroup) { - close(cgroup->fd); + if (cgroup->fd >= 0) + close(cgroup->fd); zfree(&cgroup->name); free(cgroup); } diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index a12872f2856a..6b3988a7aba8 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -28,7 +28,7 @@ int __weak sched_getcpu(void) static int perf_flag_probe(void) { - /* use 'safest' configuration as used in perf_evsel__fallback() */ + /* use 'safest' configuration as used in evsel__fallback() */ struct perf_event_attr attr = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index cd92a99eb89d..cd007cc9c283 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -564,6 +564,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( resp = cs_etm_decoder__set_tid(etmq, packet_queue, elem, trace_chan_id); break; + /* Unused packet types */ + case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH: case OCSD_GEN_TRC_ELEM_ADDR_NACC: case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 62d2f9b9ce1b..c283223fb31f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -94,6 +94,9 @@ struct cs_etm_queue { struct cs_etm_traceid_queue **traceid_queues; }; +/* RB tree for quick conversion between traceID and metadata pointers */ +static struct intlist *traceid_list; + static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, @@ -631,6 +634,16 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + return evsel->core.attr.type == aux->pmu_type; +} + static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) { struct machine *machine; @@ -2618,6 +2631,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.flush_events = cs_etm__flush_events; etm->auxtrace.free_events = cs_etm__free_events; etm->auxtrace.free = cs_etm__free; + etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; session->auxtrace = &etm->auxtrace; etm->unknown_thread = thread__new(999999999, 999999999); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 650ecc2a6349..4ad925d6d799 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -114,9 +114,6 @@ enum cs_etm_isa { CS_ETM_ISA_T32, }; -/* RB tree for quick conversion between traceID and metadata pointers */ -struct intlist *traceid_list; - struct cs_etm_queue; struct cs_etm_packet { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index dbc772bfb04e..5f36fc6a5578 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -835,7 +835,7 @@ static int process_sample_event(struct perf_tool *tool, return -1; } - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { ret = add_bpf_output_values(event_class, event, sample); if (ret) return -1; @@ -1155,7 +1155,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel) { struct bt_ctf_event_class *event_class; struct evsel_priv *priv; - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); int ret; pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type); @@ -1174,7 +1174,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel) goto err; } - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { ret = add_bpf_output_types(cw, event_class); if (ret) goto err; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 91f21239608b..f338990e0fe6 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -191,6 +191,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2db64b79617a..9553a1fd9e8a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -40,6 +40,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, DSO_BINARY_TYPE__BPF_PROG_INFO, + DSO_BINARY_TYPE__BPF_IMAGE, DSO_BINARY_TYPE__NOT_FOUND, }; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 7632075a8792..1ab2682d5d2b 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -48,6 +48,7 @@ struct perf_env { char *cpuid; unsigned long long total_mem; unsigned int msr_pmu_type; + unsigned int max_branches; int nr_cmdline; int nr_sibling_cores; @@ -57,12 +58,14 @@ struct perf_env { int nr_memory_nodes; int nr_pmu_mappings; int nr_groups; + int nr_cpu_pmu_caps; char *cmdline; const char **cmdline_argv; char *sibling_cores; char *sibling_dies; char *sibling_threads; char *pmu_mappings; + char *cpu_pmu_caps; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dc0e11214ae1..f581550a3015 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -626,7 +626,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, ret = strlist__has_entry(symbol_conf.sym_list, al->sym->name); } - if (!(ret && al->sym)) { + if (!ret && al->sym) { snprintf(al_addr_str, sz, "0x%"PRIx64, al->map->unmap_ip(al->map, al->sym->start)); ret = strlist__has_entry(symbol_conf.sym_list, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1548237b6558..0a0b760d6948 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -23,6 +23,7 @@ #include "asm/bug.h" #include "bpf-event.h" #include "util/string2.h" +#include "util/perf_api_probe.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -118,7 +119,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); perf_evlist__set_id_pos(evlist); } @@ -390,14 +391,14 @@ void evlist__disable(struct evlist *evlist) evlist__for_each_entry(evlist, pos) { if (evsel__cpu_iter_skip(pos, cpu)) continue; - if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) continue; evsel__disable_cpu(pos, pos->cpu_iter - 1); } } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = true; } @@ -420,14 +421,14 @@ void evlist__enable(struct evlist *evlist) evlist__for_each_entry(evlist, pos) { if (evsel__cpu_iter_skip(pos, cpu)) continue; - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; evsel__enable_cpu(pos, pos->cpu_iter - 1); } } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) + if (!evsel__is_group_leader(pos) || !pos->core.fd) continue; pos->disabled = false; } @@ -947,7 +948,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__set_sample_bit(evsel, bit); + __evsel__set_sample_bit(evsel, bit); } void __perf_evlist__reset_sample_bit(struct evlist *evlist, @@ -956,7 +957,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist, struct evsel *evsel; evlist__for_each_entry(evlist, evsel) - __perf_evsel__reset_sample_bit(evsel, bit); + __evsel__reset_sample_bit(evsel, bit); } int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) @@ -994,7 +995,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - err = perf_evsel__set_filter(evsel, filter); + err = evsel__set_filter(evsel, filter); if (err) break; } @@ -1014,7 +1015,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter) if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) continue; - err = perf_evsel__append_tp_filter(evsel, filter); + err = evsel__append_tp_filter(evsel, filter); if (err) break; } @@ -1131,8 +1132,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist) u64 sample_type = first->core.attr.sample_type; evlist__for_each_entry(evlist, pos) { - if (read_format != pos->core.attr.read_format) - return false; + if (read_format != pos->core.attr.read_format) { + pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n", + read_format, (u64)pos->core.attr.read_format); + } } /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ @@ -1436,7 +1439,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, if (!evsel) return -EFAULT; - return perf_evsel__parse_sample(evsel, event, sample); + return evsel__parse_sample(evsel, event, sample); } int perf_evlist__parse_sample_timestamp(struct evlist *evlist, @@ -1447,7 +1450,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist, if (!evsel) return -EFAULT; - return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); + return evsel__parse_sample_timestamp(evsel, event, timestamp); } int perf_evlist__strerror_open(struct evlist *evlist, @@ -1701,133 +1704,3 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, } return leader; } - -int perf_evlist__add_sb_event(struct evlist **evlist, - struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, - void *data) -{ - struct evsel *evsel; - bool new_evlist = (*evlist) == NULL; - - if (*evlist == NULL) - *evlist = evlist__new(); - if (*evlist == NULL) - return -1; - - if (!attr->sample_id_all) { - pr_warning("enabling sample_id_all for all side band events\n"); - attr->sample_id_all = 1; - } - - evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); - if (!evsel) - goto out_err; - - evsel->side_band.cb = cb; - evsel->side_band.data = data; - evlist__add(*evlist, evsel); - return 0; - -out_err: - if (new_evlist) { - evlist__delete(*evlist); - *evlist = NULL; - } - return -1; -} - -static void *perf_evlist__poll_thread(void *arg) -{ - struct evlist *evlist = arg; - bool draining = false; - int i, done = 0; - /* - * In order to read symbols from other namespaces perf to needs to call - * setns(2). This isn't permitted if the struct_fs has multiple users. - * unshare(2) the fs so that we may continue to setns into namespaces - * that we're observing when, for instance, reading the build-ids at - * the end of a 'perf record' session. - */ - unshare(CLONE_FS); - - while (!done) { - bool got_data = false; - - if (evlist->thread.done) - draining = true; - - if (!draining) - evlist__poll(evlist, 1000); - - for (i = 0; i < evlist->core.nr_mmaps; i++) { - struct mmap *map = &evlist->mmap[i]; - union perf_event *event; - - if (perf_mmap__read_init(&map->core)) - continue; - while ((event = perf_mmap__read_event(&map->core)) != NULL) { - struct evsel *evsel = perf_evlist__event2evsel(evlist, event); - - if (evsel && evsel->side_band.cb) - evsel->side_band.cb(event, evsel->side_band.data); - else - pr_warning("cannot locate proper evsel for the side band event\n"); - - perf_mmap__consume(&map->core); - got_data = true; - } - perf_mmap__read_done(&map->core); - } - - if (draining && !got_data) - break; - } - return NULL; -} - -int perf_evlist__start_sb_thread(struct evlist *evlist, - struct target *target) -{ - struct evsel *counter; - - if (!evlist) - return 0; - - if (perf_evlist__create_maps(evlist, target)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__open(counter, evlist->core.cpus, - evlist->core.threads) < 0) - goto out_delete_evlist; - } - - if (evlist__mmap(evlist, UINT_MAX)) - goto out_delete_evlist; - - evlist__for_each_entry(evlist, counter) { - if (evsel__enable(counter)) - goto out_delete_evlist; - } - - evlist->thread.done = 0; - if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) - goto out_delete_evlist; - - return 0; - -out_delete_evlist: - evlist__delete(evlist); - evlist = NULL; - return -1; -} - -void perf_evlist__stop_sb_thread(struct evlist *evlist) -{ - if (!evlist) - return; - evlist->thread.done = 1; - pthread_join(evlist->thread.th, NULL); - evlist__delete(evlist); -} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f5bd5c386df1..b6f325dfb4d2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -107,10 +107,11 @@ int __perf_evlist__add_default_attrs(struct evlist *evlist, int perf_evlist__add_dummy(struct evlist *evlist); -int perf_evlist__add_sb_event(struct evlist **evlist, +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, - perf_evsel__sb_cb_t cb, + evsel__sb_cb_t cb, void *data); +void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data); int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target); void perf_evlist__stop_sb_thread(struct evlist *evlist); @@ -173,10 +174,6 @@ void evlist__close(struct evlist *evlist); struct callchain_param; void perf_evlist__set_id_pos(struct evlist *evlist); -bool perf_can_sample_identifier(void); -bool perf_can_record_switch_events(void); -bool perf_can_record_cpu_wide(void); -bool perf_can_aux_sample(void); void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, struct callchain_param *callchain); int record_opts__config(struct record_opts *opts); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index eb880efbce16..f3e60c45d59a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -102,7 +102,7 @@ set_methods: #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) -int __perf_evsel__sample_size(u64 sample_type) +int __evsel__sample_size(u64 sample_type) { u64 mask = sample_type & PERF_SAMPLE_MASK; int size = 0; @@ -178,53 +178,53 @@ static int __perf_evsel__calc_is_pos(u64 sample_type) return idx; } -void perf_evsel__calc_id_pos(struct evsel *evsel) +void evsel__calc_id_pos(struct evsel *evsel) { evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type); evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type); } -void __perf_evsel__set_sample_bit(struct evsel *evsel, +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (!(evsel->core.attr.sample_type & bit)) { evsel->core.attr.sample_type |= bit; evsel->sample_size += sizeof(u64); - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); } } -void __perf_evsel__reset_sample_bit(struct evsel *evsel, +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit) { if (evsel->core.attr.sample_type & bit) { evsel->core.attr.sample_type &= ~bit; evsel->sample_size -= sizeof(u64); - perf_evsel__calc_id_pos(evsel); + evsel__calc_id_pos(evsel); } } -void perf_evsel__set_sample_id(struct evsel *evsel, +void evsel__set_sample_id(struct evsel *evsel, bool can_sample_identifier) { if (can_sample_identifier) { - perf_evsel__reset_sample_bit(evsel, ID); - perf_evsel__set_sample_bit(evsel, IDENTIFIER); + evsel__reset_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, IDENTIFIER); } else { - perf_evsel__set_sample_bit(evsel, ID); + evsel__set_sample_bit(evsel, ID); } evsel->core.attr.read_format |= PERF_FORMAT_ID; } /** - * perf_evsel__is_function_event - Return whether given evsel is a function + * evsel__is_function_event - Return whether given evsel is a function * trace event * * @evsel - evsel selector to be tested * * Return %true if event is function trace event */ -bool perf_evsel__is_function_event(struct evsel *evsel) +bool evsel__is_function_event(struct evsel *evsel) { #define FUNCTION_EVENT "ftrace:function" @@ -249,8 +249,8 @@ void evsel__init(struct evsel *evsel, evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->config_terms); perf_evsel__object.init(evsel); - evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); - perf_evsel__calc_id_pos(evsel); + evsel->sample_size = __evsel__sample_size(attr->sample_type); + evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; evsel->metric_expr = NULL; evsel->metric_name = NULL; @@ -267,13 +267,13 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return NULL; evsel__init(evsel, attr, idx); - if (perf_evsel__is_bpf_output(evsel)) { + if (evsel__is_bpf_output(evsel)) { evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->core.attr.sample_period = 1; } - if (perf_evsel__is_clock(evsel)) { + if (evsel__is_clock(evsel)) { /* * The evsel->unit points to static alias->unit * so it's ok to use static string in here. @@ -385,7 +385,7 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { "ref-cycles", }; -static const char *__perf_evsel__hw_name(u64 config) +static const char *__evsel__hw_name(u64 config) { if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) return perf_evsel__hw_names[config]; @@ -429,9 +429,9 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) return r; } -static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size) { - int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config)); + int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config)); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } @@ -448,20 +448,20 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { "dummy", }; -static const char *__perf_evsel__sw_name(u64 config) +static const char *__evsel__sw_name(u64 config) { if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) return perf_evsel__sw_names[config]; return "unknown-software"; } -static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) { - int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config)); + int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config)); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } -static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) +static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -479,10 +479,10 @@ static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) return r; } -static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size) { struct perf_event_attr *attr = &evsel->core.attr; - int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); + int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); } @@ -531,7 +531,7 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), }; -bool perf_evsel__is_cache_op_valid(u8 type, u8 op) +bool evsel__is_cache_op_valid(u8 type, u8 op) { if (perf_evsel__hw_cache_stat[type] & COP(op)) return true; /* valid */ @@ -539,8 +539,7 @@ bool perf_evsel__is_cache_op_valid(u8 type, u8 op) return false; /* invalid */ } -int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, - char *bf, size_t size) +int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size) { if (result) { return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], @@ -552,7 +551,7 @@ int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, perf_evsel__hw_cache_op[op][1]); } -static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) +static int __evsel__hw_cache_name(u64 config, char *bf, size_t size) { u8 op, result, type = (config >> 0) & 0xff; const char *err = "unknown-ext-hardware-cache-type"; @@ -571,33 +570,33 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) goto out_err; err = "invalid-cache"; - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) goto out_err; - return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); + return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size); out_err: return scnprintf(bf, size, "%s", err); } -static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size) { - int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size); + int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size); return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size) +static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size) { int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config); return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); } -static int perf_evsel__tool_name(char *bf, size_t size) +static int evsel__tool_name(char *bf, size_t size) { int ret = scnprintf(bf, size, "duration_time"); return ret; } -const char *perf_evsel__name(struct evsel *evsel) +const char *evsel__name(struct evsel *evsel) { char bf[128]; @@ -609,22 +608,22 @@ const char *perf_evsel__name(struct evsel *evsel) switch (evsel->core.attr.type) { case PERF_TYPE_RAW: - perf_evsel__raw_name(evsel, bf, sizeof(bf)); + evsel__raw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HARDWARE: - perf_evsel__hw_name(evsel, bf, sizeof(bf)); + evsel__hw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_HW_CACHE: - perf_evsel__hw_cache_name(evsel, bf, sizeof(bf)); + evsel__hw_cache_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_SOFTWARE: if (evsel->tool_event) - perf_evsel__tool_name(bf, sizeof(bf)); + evsel__tool_name(bf, sizeof(bf)); else - perf_evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -632,7 +631,7 @@ const char *perf_evsel__name(struct evsel *evsel) break; case PERF_TYPE_BREAKPOINT: - perf_evsel__bp_name(evsel, bf, sizeof(bf)); + evsel__bp_name(evsel, bf, sizeof(bf)); break; default: @@ -649,7 +648,7 @@ out_unknown: return "unknown"; } -const char *perf_evsel__group_name(struct evsel *evsel) +const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; } @@ -664,21 +663,19 @@ const char *perf_evsel__group_name(struct evsel *evsel) * For record -e 'cycles,instructions' and report --group * 'cycles:u, instructions:u' */ -int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size) +int evsel__group_desc(struct evsel *evsel, char *buf, size_t size) { int ret = 0; struct evsel *pos; - const char *group_name = perf_evsel__group_name(evsel); + const char *group_name = evsel__group_name(evsel); if (!evsel->forced_leader) ret = scnprintf(buf, size, "%s { ", group_name); - ret += scnprintf(buf + ret, size - ret, "%s", - perf_evsel__name(evsel)); + ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel)); for_each_group_member(pos, evsel) - ret += scnprintf(buf + ret, size - ret, ", %s", - perf_evsel__name(pos)); + ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos)); if (!evsel->forced_leader) ret += scnprintf(buf + ret, size - ret, " }"); @@ -686,14 +683,13 @@ int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size) return ret; } -static void __perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *param) { - bool function = perf_evsel__is_function_event(evsel); + bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__set_sample_bit(evsel, CALLCHAIN); + evsel__set_sample_bit(evsel, CALLCHAIN); attr->sample_max_stack = param->max_stack; @@ -708,7 +704,7 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, "to get user callchain information. " "Falling back to framepointers.\n"); } else { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | @@ -722,8 +718,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { - perf_evsel__set_sample_bit(evsel, REGS_USER); - perf_evsel__set_sample_bit(evsel, STACK_USER); + evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) { attr->sample_regs_user |= DWARF_MINIMAL_REGS; pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, " @@ -746,12 +742,11 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, } } -void perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *param) { if (param->enabled) - return __perf_evsel__config_callchain(evsel, opts, param); + return __evsel__config_callchain(evsel, opts, param); } static void @@ -760,16 +755,16 @@ perf_evsel__reset_callgraph(struct evsel *evsel, { struct perf_event_attr *attr = &evsel->core.attr; - perf_evsel__reset_sample_bit(evsel, CALLCHAIN); + evsel__reset_sample_bit(evsel, CALLCHAIN); if (param->record_mode == CALLCHAIN_LBR) { - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { - perf_evsel__reset_sample_bit(evsel, REGS_USER); - perf_evsel__reset_sample_bit(evsel, STACK_USER); + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, STACK_USER); } } @@ -793,32 +788,32 @@ static void apply_config_terms(struct evsel *evsel, if (!(term->weak && opts->user_interval != ULLONG_MAX)) { attr->sample_period = term->val.period; attr->freq = 0; - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_FREQ: if (!(term->weak && opts->user_freq != UINT_MAX)) { attr->sample_freq = term->val.freq; attr->freq = 1; - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); else - perf_evsel__reset_sample_bit(evsel, TIME); + evsel__reset_sample_bit(evsel, TIME); break; case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: callgraph_buf = term->val.str; break; case PERF_EVSEL__CONFIG_TERM_BRANCH: if (term->val.str && strcmp(term->val.str, "no")) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); parse_branch_str(term->val.str, &attr->branch_sample_type); } else - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); break; case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; @@ -832,7 +827,7 @@ static void apply_config_terms(struct evsel *evsel, case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by - * perf_evsel__config. If user explicitly set + * evsel__config. If user explicitly set * inherit using config terms, override global * opt->no_inherit setting. */ @@ -897,11 +892,11 @@ static void apply_config_terms(struct evsel *evsel, /* set perf-event callgraph */ if (param.enabled) { if (sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, DATA_SRC); evsel->core.attr.mmap_data = track; } - perf_evsel__config_callchain(evsel, opts, ¶m); + evsel__config_callchain(evsel, opts, ¶m); } } } @@ -953,8 +948,8 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel * enable/disable events specifically, as there's no * initial traced exec call. */ -void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, - struct callchain_param *callchain) +void evsel__config(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain) { struct evsel *leader = evsel->leader; struct perf_event_attr *attr = &evsel->core.attr; @@ -965,17 +960,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, attr->inherit = !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; - perf_evsel__set_sample_bit(evsel, IP); - perf_evsel__set_sample_bit(evsel, TID); + evsel__set_sample_bit(evsel, IP); + evsel__set_sample_bit(evsel, TID); if (evsel->sample_read) { - perf_evsel__set_sample_bit(evsel, READ); + evsel__set_sample_bit(evsel, READ); /* * We need ID even in case of single event, because * PERF_SAMPLE_READ process ID specific data. */ - perf_evsel__set_sample_id(evsel, false); + evsel__set_sample_id(evsel, false); /* * Apply group format only if we belong to group @@ -994,7 +989,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (!attr->sample_period || (opts->user_freq != UINT_MAX || opts->user_interval != ULLONG_MAX)) { if (opts->freq) { - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); attr->freq = 1; attr->sample_freq = opts->freq; } else { @@ -1002,25 +997,6 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } } - /* - * Disable sampling for all group members other - * than leader in case leader 'leads' the sampling. - */ - if ((leader != evsel) && leader->sample_read) { - attr->freq = 0; - attr->sample_freq = 0; - attr->sample_period = 0; - attr->write_backward = 0; - - /* - * We don't get sample for slave events, we make them - * when delivering group leader sample. Set the slave - * event to follow the master sample_type to ease up - * report. - */ - attr->sample_type = leader->core.attr.sample_type; - } - if (opts->no_samples) attr->sample_freq = 0; @@ -1033,7 +1009,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, } if (opts->sample_address) { - perf_evsel__set_sample_bit(evsel, ADDR); + evsel__set_sample_bit(evsel, ADDR); attr->mmap_data = track; } @@ -1042,24 +1018,24 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * event, due to issues with page faults while tracing page * fault handler and its overall trickiness nature. */ - if (perf_evsel__is_function_event(evsel)) + if (evsel__is_function_event(evsel)) evsel->core.attr.exclude_callchain_user = 1; if (callchain && callchain->enabled && !evsel->no_aux_samples) - perf_evsel__config_callchain(evsel, opts, callchain); + evsel__config_callchain(evsel, opts, callchain); if (opts->sample_intr_regs) { attr->sample_regs_intr = opts->sample_intr_regs; - perf_evsel__set_sample_bit(evsel, REGS_INTR); + evsel__set_sample_bit(evsel, REGS_INTR); } if (opts->sample_user_regs) { attr->sample_regs_user |= opts->sample_user_regs; - perf_evsel__set_sample_bit(evsel, REGS_USER); + evsel__set_sample_bit(evsel, REGS_USER); } if (target__has_cpu(&opts->target) || opts->sample_cpu) - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, CPU); /* * When the user explicitly disabled time don't force it here. @@ -1068,31 +1044,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, (!perf_missing_features.sample_id_all && (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || opts->sample_time_set))) - perf_evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, TIME); if (opts->raw_samples && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, TIME); - perf_evsel__set_sample_bit(evsel, RAW); - perf_evsel__set_sample_bit(evsel, CPU); + evsel__set_sample_bit(evsel, TIME); + evsel__set_sample_bit(evsel, RAW); + evsel__set_sample_bit(evsel, CPU); } if (opts->sample_address) - perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) - perf_evsel__set_sample_bit(evsel, PHYS_ADDR); + evsel__set_sample_bit(evsel, PHYS_ADDR); if (opts->no_buffering) { attr->watermark = 0; attr->wakeup_events = 1; } if (opts->branch_stack && !evsel->no_aux_samples) { - perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = opts->branch_stack; } if (opts->sample_weight) - perf_evsel__set_sample_bit(evsel, WEIGHT); + evsel__set_sample_bit(evsel, WEIGHT); attr->task = track; attr->mmap = track; @@ -1106,14 +1082,14 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, if (opts->record_cgroup) { attr->cgroup = track && !perf_missing_features.cgroup; - perf_evsel__set_sample_bit(evsel, CGROUP); + evsel__set_sample_bit(evsel, CGROUP); } if (opts->record_switch_events) attr->context_switch = track; if (opts->sample_transaction) - perf_evsel__set_sample_bit(evsel, TRANSACTION); + evsel__set_sample_bit(evsel, TRANSACTION); if (opts->running_time) { evsel->core.attr.read_format |= @@ -1127,15 +1103,15 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * Disabling only independent events or group leaders, * keeping group members enabled. */ - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) attr->disabled = 1; /* * Setting enable_on_exec for independent events and * group leaders for traced executed by perf. */ - if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) && - !opts->initial_delay) + if (target__none(&opts->target) && evsel__is_group_leader(evsel) && + !opts->initial_delay) attr->enable_on_exec = 1; if (evsel->immediate) { @@ -1176,9 +1152,9 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, /* The --period option takes the precedence. */ if (opts->period_set) { if (opts->period) - perf_evsel__set_sample_bit(evsel, PERIOD); + evsel__set_sample_bit(evsel, PERIOD); else - perf_evsel__reset_sample_bit(evsel, PERIOD); + evsel__reset_sample_bit(evsel, PERIOD); } /* @@ -1187,10 +1163,10 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts, * if BRANCH_STACK bit is set. */ if (opts->initial_delay && is_dummy_event(evsel)) - perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); + evsel__reset_sample_bit(evsel, BRANCH_STACK); } -int perf_evsel__set_filter(struct evsel *evsel, const char *filter) +int evsel__set_filter(struct evsel *evsel, const char *filter) { char *new_filter = strdup(filter); @@ -1203,13 +1179,12 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter) return -1; } -static int perf_evsel__append_filter(struct evsel *evsel, - const char *fmt, const char *filter) +static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter) { char *new_filter; if (evsel->filter == NULL) - return perf_evsel__set_filter(evsel, filter); + return evsel__set_filter(evsel, filter); if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) { free(evsel->filter); @@ -1220,14 +1195,14 @@ static int perf_evsel__append_filter(struct evsel *evsel, return -1; } -int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter) +int evsel__append_tp_filter(struct evsel *evsel, const char *filter) { - return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter); + return evsel__append_filter(evsel, "(%s) && (%s)", filter); } -int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter) +int evsel__append_addr_filter(struct evsel *evsel, const char *filter) { - return perf_evsel__append_filter(evsel, "%s,%s", filter); + return evsel__append_filter(evsel, "%s,%s", filter); } /* Caller has to clear disabled after going through all CPUs. */ @@ -1278,7 +1253,7 @@ static void perf_evsel__free_config_terms(struct evsel *evsel) } } -void perf_evsel__exit(struct evsel *evsel) +void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); @@ -1298,12 +1273,12 @@ void perf_evsel__exit(struct evsel *evsel) void evsel__delete(struct evsel *evsel) { - perf_evsel__exit(evsel); + evsel__exit(evsel); free(evsel); } -void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1342,8 +1317,7 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -static int -perf_evsel__read_one(struct evsel *evsel, int cpu, int thread) +static int evsel__read_one(struct evsel *evsel, int cpu, int thread) { struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); @@ -1403,8 +1377,7 @@ perf_evsel__process_group_data(struct evsel *leader, return 0; } -static int -perf_evsel__read_group(struct evsel *leader, int cpu, int thread) +static int evsel__read_group(struct evsel *leader, int cpu, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1414,7 +1387,7 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread) if (!(read_format & PERF_FORMAT_ID)) return -EINVAL; - if (!perf_evsel__is_group_leader(leader)) + if (!evsel__is_group_leader(leader)) return -EINVAL; if (!data) { @@ -1434,18 +1407,17 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread) return perf_evsel__process_group_data(leader, cpu, thread, data); } -int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return perf_evsel__read_group(evsel, cpu, thread); - else - return perf_evsel__read_one(evsel, cpu, thread); + return evsel__read_group(evsel, cpu, thread); + + return evsel__read_one(evsel, cpu, thread); } -int __perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; @@ -1459,7 +1431,7 @@ int __perf_evsel__read_on_cpu(struct evsel *evsel, if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, thread, &count); + evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); *perf_counts(evsel->counts, cpu, thread) = count; return 0; @@ -1470,7 +1442,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) struct evsel *leader = evsel->leader; int fd; - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) return -1; /* @@ -1749,8 +1721,7 @@ retry_open: /* * If we succeeded but had to kill clockid, fail and - * have perf_evsel__open_strerror() print us a nice - * error. + * have evsel__open_strerror() print us a nice error. */ if (perf_missing_features.clockid || perf_missing_features.clockid_wrong) { @@ -1854,7 +1825,7 @@ try_fallback: } else if (!perf_missing_features.group_read && evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - perf_evsel__is_group_leader(evsel)) { + evsel__is_group_leader(evsel)) { perf_missing_features.group_read = true; pr_debug2_peo("switching off group read\n"); goto fallback_missing_features; @@ -1888,9 +1859,7 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus, - int cpu) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) { if (cpu == -1) return evsel__open_cpu(evsel, cpus, NULL, 0, @@ -1899,8 +1868,7 @@ int perf_evsel__open_per_cpu(struct evsel *evsel, return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); } -int perf_evsel__open_per_thread(struct evsel *evsel, - struct perf_thread_map *threads) +int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) { return evsel__open(evsel, NULL, threads); } @@ -1995,8 +1963,8 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size) return 0; } -int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) { u64 type = evsel->core.attr.sample_type; bool swapped = evsel->needs_swap; @@ -2136,7 +2104,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, } } - if (evsel__has_callchain(evsel)) { + if (type & PERF_SAMPLE_CALLCHAIN) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); OVERFLOW_CHECK_u64(array); @@ -2190,7 +2158,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (perf_evsel__has_branch_hw_idx(evsel)) + if (evsel__has_branch_hw_idx(evsel)) sz += sizeof(u64); else data->no_hw_idx = true; @@ -2298,9 +2266,8 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, return 0; } -int perf_evsel__parse_sample_timestamp(struct evsel *evsel, - union perf_event *event, - u64 *timestamp) +int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, + u64 *timestamp) { u64 type = evsel->core.attr.sample_type; const __u64 *array; @@ -2342,15 +2309,14 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel, return 0; } -struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name) +struct tep_format_field *evsel__field(struct evsel *evsel, const char *name) { return tep_find_field(evsel->tp_format, name); } -void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, - const char *name) +void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); int offset; if (!field) @@ -2405,10 +2371,9 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam return 0; } -u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, - const char *name) +u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name) { - struct tep_format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = evsel__field(evsel, name); if (!field) return 0; @@ -2416,8 +2381,7 @@ u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; } -bool perf_evsel__fallback(struct evsel *evsel, int err, - char *msg, size_t msgsize) +bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize) { int paranoid; @@ -2442,10 +2406,14 @@ bool perf_evsel__fallback(struct evsel *evsel, int err, return true; } else if (err == EACCES && !evsel->core.attr.exclude_kernel && (paranoid = perf_event_paranoid()) > 1) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); char *new_name; const char *sep = ":"; + /* If event has exclude user then don't exclude kernel. */ + if (evsel->core.attr.exclude_user) + return false; + /* Is there already the separator in the name. */ if (strchr(name, '/') || strchr(name, ':')) @@ -2505,8 +2473,8 @@ static bool find_process(const char *name) return ret ? false : true; } -int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, - int err, char *msg, size_t size) +int evsel__open_strerror(struct evsel *evsel, struct target *target, + int err, char *msg, size_t size) { char sbuf[STRERR_BUFSIZE]; int printed = 0; @@ -2516,28 +2484,26 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, case EACCES: if (err == EPERM) printed = scnprintf(msg, size, - "No permission to enable %s event.\n\n", - perf_evsel__name(evsel)); + "No permission to enable %s event.\n\n", evsel__name(evsel)); return scnprintf(msg + printed, size - printed, "You may not have permission to collect %sstats.\n\n" "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" "which controls use of the performance events system by\n" - "unprivileged users (without CAP_SYS_ADMIN).\n\n" + "unprivileged users (without CAP_PERFMON or CAP_SYS_ADMIN).\n\n" "The current value is %d:\n\n" " -1: Allow use of (almost) all events by all users\n" " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" - ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n" - " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n" - ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" - ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n" + ">= 0: Disallow ftrace function tracepoint by users without CAP_PERFMON or CAP_SYS_ADMIN\n" + " Disallow raw tracepoint access by users without CAP_SYS_PERFMON or CAP_SYS_ADMIN\n" + ">= 1: Disallow CPU event access by users without CAP_PERFMON or CAP_SYS_ADMIN\n" + ">= 2: Disallow kernel profiling by users without CAP_PERFMON or CAP_SYS_ADMIN\n\n" "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" " kernel.perf_event_paranoid = -1\n" , target->system_wide ? "system-wide " : "", perf_event_paranoid()); case ENOENT: - return scnprintf(msg, size, "The %s event is not supported.", - perf_evsel__name(evsel)); + return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel)); case EMFILE: return scnprintf(msg, size, "%s", "Too many events are opened.\n" @@ -2561,7 +2527,7 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, if (evsel->core.attr.sample_period != 0) return scnprintf(msg, size, "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", - perf_evsel__name(evsel)); + evsel__name(evsel)); if (evsel->core.attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2594,11 +2560,10 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" "/bin/dmesg | grep -i perf may provide additional information.\n", - err, str_error_r(err, sbuf, sizeof(sbuf)), - perf_evsel__name(evsel)); + err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } -struct perf_env *perf_evsel__env(struct evsel *evsel) +struct perf_env *evsel__env(struct evsel *evsel) { if (evsel && evsel->evlist) return evsel->evlist->env; @@ -2623,7 +2588,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) return 0; } -int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist) +int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) { struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 53187c501ee8..351c0aaf2a11 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -18,7 +18,7 @@ struct perf_counts; struct perf_stat_evsel; union perf_event; -typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); +typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); enum perf_tool_event { PERF_TOOL_NONE = 0, @@ -101,9 +101,17 @@ struct evsel { int cpu_iter; const char *pmu_name; struct { - perf_evsel__sb_cb_t *cb; - void *data; + evsel__sb_cb_t *cb; + void *data; } side_band; + /* + * For reporting purposes, an evsel sample can have a callchain + * synthesized from AUX area data. Keep track of synthesized sample + * types here. Note, the recorded sample_type cannot be changed because + * it is needed to continue to parse events. + * See also evsel__has_callchain(). + */ + __u64 synth_sample_type; }; struct perf_missing_features { @@ -135,7 +143,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel) return perf_evsel__cpus(&evsel->core); } -static inline int perf_evsel__nr_cpus(struct evsel *evsel) +static inline int evsel__nr_cpus(struct evsel *evsel) { return evsel__cpus(evsel)->nr; } @@ -143,13 +151,16 @@ static inline int perf_evsel__nr_cpus(struct evsel *evsel) void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, - struct perf_counts_values *count); +void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); int perf_evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel), void (*fini)(struct evsel *evsel)); +struct perf_pmu *evsel__find_pmu(struct evsel *evsel); +bool evsel__is_aux_event(struct evsel *evsel); + struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx); static inline struct evsel *evsel__new(struct perf_event_attr *attr) @@ -172,22 +183,20 @@ struct evsel *perf_evsel__new_cycles(bool precise); struct tep_event *event_format__new(const char *sys, const char *name); void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx); -void perf_evsel__exit(struct evsel *evsel); +void evsel__exit(struct evsel *evsel); void evsel__delete(struct evsel *evsel); struct callchain_param; -void perf_evsel__config(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *callchain); -void perf_evsel__config_callchain(struct evsel *evsel, - struct record_opts *opts, - struct callchain_param *callchain); +void evsel__config(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain); +void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, + struct callchain_param *callchain); -int __perf_evsel__sample_size(u64 sample_type); -void perf_evsel__calc_id_pos(struct evsel *evsel); +int __evsel__sample_size(u64 sample_type); +void evsel__calc_id_pos(struct evsel *evsel); -bool perf_evsel__is_cache_op_valid(u8 type, u8 op); +bool evsel__is_cache_op_valid(u8 type, u8 op); #define PERF_EVSEL__MAX_ALIASES 8 @@ -199,177 +208,153 @@ extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] [PERF_EVSEL__MAX_ALIASES]; extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX]; extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX]; -int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, - char *bf, size_t size); -const char *perf_evsel__name(struct evsel *evsel); +int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); +const char *evsel__name(struct evsel *evsel); -const char *perf_evsel__group_name(struct evsel *evsel); -int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size); +const char *evsel__group_name(struct evsel *evsel); +int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); -void __perf_evsel__set_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); -void __perf_evsel__reset_sample_bit(struct evsel *evsel, - enum perf_event_sample_format bit); +void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); +void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit); -#define perf_evsel__set_sample_bit(evsel, bit) \ - __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) +#define evsel__set_sample_bit(evsel, bit) \ + __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit) -#define perf_evsel__reset_sample_bit(evsel, bit) \ - __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) +#define evsel__reset_sample_bit(evsel, bit) \ + __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit) -void perf_evsel__set_sample_id(struct evsel *evsel, - bool use_sample_identifier); +void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); -int perf_evsel__set_filter(struct evsel *evsel, const char *filter); -int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter); -int perf_evsel__append_addr_filter(struct evsel *evsel, - const char *filter); +int evsel__set_filter(struct evsel *evsel, const char *filter); +int evsel__append_tp_filter(struct evsel *evsel, const char *filter); +int evsel__append_addr_filter(struct evsel *evsel, const char *filter); int evsel__enable_cpu(struct evsel *evsel, int cpu); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); int evsel__disable_cpu(struct evsel *evsel, int cpu); -int perf_evsel__open_per_cpu(struct evsel *evsel, - struct perf_cpu_map *cpus, - int cpu); -int perf_evsel__open_per_thread(struct evsel *evsel, - struct perf_thread_map *threads); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); +int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void evsel__close(struct evsel *evsel); struct perf_sample; -void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, - const char *name); -u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample, - const char *name); +void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name); +u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name); -static inline char *perf_evsel__strval(struct evsel *evsel, - struct perf_sample *sample, - const char *name) +static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name) { - return perf_evsel__rawptr(evsel, sample, name); + return evsel__rawptr(evsel, sample, name); } struct tep_format_field; u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap); -struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name); +struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); -#define perf_evsel__match(evsel, t, c) \ +#define evsel__match(evsel, t, c) \ (evsel->core.attr.type == PERF_TYPE_##t && \ evsel->core.attr.config == PERF_COUNT_##c) -static inline bool perf_evsel__match2(struct evsel *e1, - struct evsel *e2) +static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) { return (e1->core.attr.type == e2->core.attr.type) && (e1->core.attr.config == e2->core.attr.config); } -#define perf_evsel__cmp(a, b) \ - ((a) && \ - (b) && \ - (a)->core.attr.type == (b)->core.attr.type && \ - (a)->core.attr.config == (b)->core.attr.config) - -int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu, int thread); -int __perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); /** - * perf_evsel__read_on_cpu - Read out the results on a CPU and thread + * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value * @cpu - CPU of interest * @thread - thread of interest */ -static inline int perf_evsel__read_on_cpu(struct evsel *evsel, - int cpu, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) { - return __perf_evsel__read_on_cpu(evsel, cpu, thread, false); + return __evsel__read_on_cpu(evsel, cpu, thread, false); } /** - * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled + * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value * @cpu - CPU of interest * @thread - thread of interest */ -static inline int perf_evsel__read_on_cpu_scaled(struct evsel *evsel, - int cpu, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) { - return __perf_evsel__read_on_cpu(evsel, cpu, thread, true); + return __evsel__read_on_cpu(evsel, cpu, thread, true); } -int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *sample); -int perf_evsel__parse_sample_timestamp(struct evsel *evsel, - union perf_event *event, - u64 *timestamp); +int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, + u64 *timestamp); -static inline struct evsel *perf_evsel__next(struct evsel *evsel) +static inline struct evsel *evsel__next(struct evsel *evsel) { return list_entry(evsel->core.node.next, struct evsel, core.node); } -static inline struct evsel *perf_evsel__prev(struct evsel *evsel) +static inline struct evsel *evsel__prev(struct evsel *evsel) { return list_entry(evsel->core.node.prev, struct evsel, core.node); } /** - * perf_evsel__is_group_leader - Return whether given evsel is a leader event + * evsel__is_group_leader - Return whether given evsel is a leader event * * @evsel - evsel selector to be tested * * Return %true if @evsel is a group leader or a stand-alone event */ -static inline bool perf_evsel__is_group_leader(const struct evsel *evsel) +static inline bool evsel__is_group_leader(const struct evsel *evsel) { return evsel->leader == evsel; } /** - * perf_evsel__is_group_event - Return whether given evsel is a group event + * evsel__is_group_event - Return whether given evsel is a group event * * @evsel - evsel selector to be tested * * Return %true iff event group view is enabled and @evsel is a actual group * leader which has other members in the group */ -static inline bool perf_evsel__is_group_event(struct evsel *evsel) +static inline bool evsel__is_group_event(struct evsel *evsel) { if (!symbol_conf.event_group) return false; - return perf_evsel__is_group_leader(evsel) && evsel->core.nr_members > 1; + return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1; } -bool perf_evsel__is_function_event(struct evsel *evsel); +bool evsel__is_function_event(struct evsel *evsel); -static inline bool perf_evsel__is_bpf_output(struct evsel *evsel) +static inline bool evsel__is_bpf_output(struct evsel *evsel) { - return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); + return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); } -static inline bool perf_evsel__is_clock(struct evsel *evsel) +static inline bool evsel__is_clock(struct evsel *evsel) { - return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || - perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); + return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || + evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } -bool perf_evsel__fallback(struct evsel *evsel, int err, - char *msg, size_t msgsize); -int perf_evsel__open_strerror(struct evsel *evsel, struct target *target, - int err, char *msg, size_t size); +bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize); +int evsel__open_strerror(struct evsel *evsel, struct target *target, + int err, char *msg, size_t size); -static inline int perf_evsel__group_idx(struct evsel *evsel) +static inline int evsel__group_idx(struct evsel *evsel) { return evsel->idx - evsel->leader->idx; } @@ -386,22 +371,37 @@ for ((_evsel) = _leader; \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node)) -static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) +static inline bool evsel__has_branch_callstack(const struct evsel *evsel) { return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } -static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel) { return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; } static inline bool evsel__has_callchain(const struct evsel *evsel) { - return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; + /* + * For reporting purposes, an evsel sample can have a recorded callchain + * or a callchain synthesized from AUX area data. + */ + return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN || + evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN; +} + +static inline bool evsel__has_br_stack(const struct evsel *evsel) +{ + /* + * For reporting purposes, an evsel sample can have a recorded branch + * stack or a branch stack synthesized from AUX area data. + */ + return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK || + evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK; } -struct perf_env *perf_evsel__env(struct evsel *evsel); +struct perf_env *evsel__env(struct evsel *evsel); -int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist); +int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index e026ab67b008..f8938916577c 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -7,7 +7,7 @@ /* * The 'struct perf_evsel_config_term' is used to pass event - * specific configuration data to perf_evsel__config routine. + * specific configuration data to evsel__config routine. * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 3b4842840db0..99aed708bd5a 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -44,22 +44,22 @@ int perf_evsel__fprintf(struct evsel *evsel, if (details->event_group) { struct evsel *pos; - if (!perf_evsel__is_group_leader(evsel)) + if (!evsel__is_group_leader(evsel)) return 0; if (evsel->core.nr_members > 1) printed += fprintf(fp, "%s{", evsel->group_name ?: ""); - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + printed += fprintf(fp, "%s", evsel__name(evsel)); for_each_group_member(pos, evsel) - printed += fprintf(fp, ",%s", perf_evsel__name(pos)); + printed += fprintf(fp, ",%s", evsel__name(pos)); if (evsel->core.nr_members > 1) printed += fprintf(fp, "}"); goto out; } - printed += fprintf(fp, "%s", perf_evsel__name(evsel)); + printed += fprintf(fp, "%s", evsel__name(evsel)); if (details->verbose) { printed += perf_event_attr__fprintf(fp, &evsel->core.attr, diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index fd192ddf93c1..aa631e37ad1e 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -3,7 +3,6 @@ #include <assert.h> #include "expr.h" #include "expr-bison.h" -#define YY_EXTRA_TYPE int #include "expr-flex.h" #ifdef PARSER_DEBUG @@ -11,7 +10,7 @@ extern int expr_debug; #endif /* Caller must make sure id is allocated */ -void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +void expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val) { int idx; @@ -21,20 +20,24 @@ void expr__add_id(struct parse_ctx *ctx, const char *name, double val) ctx->ids[idx].val = val; } -void expr__ctx_init(struct parse_ctx *ctx) +void expr__ctx_init(struct expr_parse_ctx *ctx) { ctx->num_ids = 0; } static int -__expr__parse(double *val, struct parse_ctx *ctx, const char *expr, - int start) +__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, + int start, int runtime) { + struct expr_scanner_ctx scanner_ctx = { + .start_token = start, + .runtime = runtime, + }; YY_BUFFER_STATE buffer; void *scanner; int ret; - ret = expr_lex_init_extra(start, &scanner); + ret = expr_lex_init_extra(&scanner_ctx, &scanner); if (ret) return ret; @@ -52,9 +55,9 @@ __expr__parse(double *val, struct parse_ctx *ctx, const char *expr, return ret; } -int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) +int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; } static bool @@ -72,13 +75,13 @@ already_seen(const char *val, const char *one, const char **other, } int expr__find_other(const char *expr, const char *one, const char ***other, - int *num_other) + int *num_other, int runtime) { int err, i = 0, j = 0; - struct parse_ctx ctx; + struct expr_parse_ctx ctx; expr__ctx_init(&ctx); - err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER, runtime); if (err) return -1; diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 9377538f4097..87d627bb699b 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -5,20 +5,25 @@ #define EXPR_MAX_OTHER 20 #define MAX_PARSE_ID EXPR_MAX_OTHER -struct parse_id { +struct expr_parse_id { const char *name; double val; }; -struct parse_ctx { +struct expr_parse_ctx { int num_ids; - struct parse_id ids[MAX_PARSE_ID]; + struct expr_parse_id ids[MAX_PARSE_ID]; }; -void expr__ctx_init(struct parse_ctx *ctx); -void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); +struct expr_scanner_ctx { + int start_token; + int runtime; +}; + +void expr__ctx_init(struct expr_parse_ctx *ctx); +void expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val); +int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); int expr__find_other(const char *expr, const char *one, const char ***other, - int *num_other); + int *num_other, int runtime); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index eaad29243c23..74b9b59b1aa5 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -35,7 +35,7 @@ static int value(yyscan_t scanner, int base) * Allow @ instead of / to be able to specify pmu/event/ without * conflicts with normal division. */ -static char *normalize(char *str) +static char *normalize(char *str, int runtime) { char *ret = str; char *dst = str; @@ -45,6 +45,19 @@ static char *normalize(char *str) *dst++ = '/'; else if (*str == '\\') *dst++ = *++str; + else if (*str == '?') { + char *paramval; + int i = 0; + int size = asprintf(¶mval, "%d", runtime); + + if (size < 0) + *dst++ = '0'; + else { + while (i < size) + *dst++ = paramval[i++]; + free(paramval); + } + } else *dst++ = *str; str++; @@ -54,16 +67,16 @@ static char *normalize(char *str) return ret; } -static int str(yyscan_t scanner, int token) +static int str(yyscan_t scanner, int token, int runtime) { YYSTYPE *yylval = expr_get_lval(scanner); char *text = expr_get_text(scanner); - yylval->str = normalize(strdup(text)); + yylval->str = normalize(strdup(text), runtime); if (!yylval->str) return EXPR_ERROR; - yylval->str = normalize(yylval->str); + yylval->str = normalize(yylval->str, runtime); return token; } %} @@ -72,17 +85,17 @@ number [0-9]+ sch [-,=] spec \\{sch} -sym [0-9a-zA-Z_\.:@]+ -symbol {spec}*{sym}*{spec}*{sym}* +sym [0-9a-zA-Z_\.:@?]+ +symbol {spec}*{sym}*{spec}*{sym}*{spec}*{sym} %% - { - int start_token; + struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); - start_token = expr_get_extra(yyscanner); + { + int start_token = sctx->start_token; - if (start_token) { - expr_set_extra(NULL, yyscanner); + if (sctx->start_token) { + sctx->start_token = 0; return start_token; } } @@ -93,7 +106,7 @@ if { return IF; } else { return ELSE; } #smt_on { return SMT_ON; } {number} { return value(yyscanner, 10); } -{symbol} { return str(yyscanner, ID); } +{symbol} { return str(yyscanner, ID, sctx->runtime); } "|" { return '|'; } "^" { return '^'; } "&" { return '&'; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 4720cbe79357..cd17486c1c5d 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -15,7 +15,7 @@ %define api.pure full %parse-param { double *final_val } -%parse-param { struct parse_ctx *ctx } +%parse-param { struct expr_parse_ctx *ctx } %parse-param {void *scanner} %lex-param {void* scanner} @@ -39,14 +39,14 @@ %{ static void expr_error(double *final_val __maybe_unused, - struct parse_ctx *ctx __maybe_unused, + struct expr_parse_ctx *ctx __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } -static int lookup_id(struct parse_ctx *ctx, char *id, double *val) +static int lookup_id(struct expr_parse_ctx *ctx, char *id, double *val) { int i; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index acbd046bf95c..0ce47283a8a1 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -525,7 +525,7 @@ static int write_event_desc(struct feat_fd *ff, /* * write event string as passed on cmdline */ - ret = do_write_string(ff, perf_evsel__name(evsel)); + ret = do_write_string(ff, evsel__name(evsel)); if (ret < 0) return ret; /* @@ -783,8 +783,7 @@ static int write_group_desc(struct feat_fd *ff, return ret; evlist__for_each_entry(evlist, evsel) { - if (perf_evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1) { + if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { const char *name = evsel->group_name ?: "{anon_group}"; u32 leader_idx = evsel->idx; u32 nr_members = evsel->core.nr_members; @@ -1395,6 +1394,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused, return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); } +static int write_cpu_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + struct perf_pmu_caps *caps = NULL; + int nr_caps; + int ret; + + if (!cpu_pmu) + return -ENOENT; + + nr_caps = perf_pmu__caps_parse(cpu_pmu); + if (nr_caps < 0) + return nr_caps; + + ret = do_write(ff, &nr_caps, sizeof(nr_caps)); + if (ret < 0) + return ret; + + list_for_each_entry(caps, &cpu_pmu->caps, list) { + ret = do_write_string(ff, caps->name); + if (ret < 0) + return ret; + + ret = do_write_string(ff, caps->value); + if (ret < 0) + return ret; + } + + return ret; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1809,6 +1840,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp) ff->ph->env.comp_level, ff->ph->env.comp_ratio); } +static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + const char *delimiter = "# cpu pmu capabilities: "; + u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps; + char *str; + + if (!nr_caps) { + fprintf(fp, "# cpu pmu capabilities: not available\n"); + return; + } + + str = ff->ph->env.cpu_pmu_caps; + while (nr_caps--) { + fprintf(fp, "%s%s", delimiter, str); + delimiter = ", "; + str += strlen(str) + 1; + } + + fprintf(fp, "\n"); +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -1854,14 +1906,12 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) session = container_of(ff->ph, struct perf_session, header); evlist__for_each_entry(session->evlist, evsel) { - if (perf_evsel__is_group_leader(evsel) && - evsel->core.nr_members > 1) { - fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", - perf_evsel__name(evsel)); + if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) { + fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel)); nr = evsel->core.nr_members - 1; } else if (nr) { - fprintf(fp, ",%s", perf_evsel__name(evsel)); + fprintf(fp, ",%s", evsel__name(evsel)); if (--nr == 0) fprintf(fp, "}\n"); @@ -2846,6 +2896,60 @@ static int process_compressed(struct feat_fd *ff, return 0; } +static int process_cpu_pmu_caps(struct feat_fd *ff, + void *data __maybe_unused) +{ + char *name, *value; + struct strbuf sb; + u32 nr_caps; + + if (do_read_u32(ff, &nr_caps)) + return -1; + + if (!nr_caps) { + pr_debug("cpu pmu capabilities not available\n"); + return 0; + } + + ff->ph->env.nr_cpu_pmu_caps = nr_caps; + + if (strbuf_init(&sb, 128) < 0) + return -1; + + while (nr_caps--) { + name = do_read_string(ff); + if (!name) + goto error; + + value = do_read_string(ff); + if (!value) + goto free_name; + + if (strbuf_addf(&sb, "%s=%s", name, value) < 0) + goto free_value; + + /* include a NULL character at the end */ + if (strbuf_add(&sb, "", 1) < 0) + goto free_value; + + if (!strcmp(name, "branches")) + ff->ph->env.max_branches = atoi(value); + + free(value); + free(name); + } + ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL); + return 0; + +free_value: + free(value); +free_name: + free(name); +error: + strbuf_release(&sb); + return -1; +} + #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -2903,6 +3007,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), FEAT_OPR(COMPRESSED, compressed, false), + FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 840f95cee349..650bd1c7a99b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -43,6 +43,7 @@ enum { HEADER_BPF_PROG_INFO, HEADER_BPF_BTF, HEADER_COMPRESSED, + HEADER_CPU_PMU_CAPS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 283a69ff6a3d..12b65d00cf65 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter, return fill_callchain_info(al, node, iter->hide_unresolved); } +static bool +hist_entry__fast__sym_diff(struct hist_entry *left, + struct hist_entry *right) +{ + struct symbol *sym_l = left->ms.sym; + struct symbol *sym_r = right->ms.sym; + + if (!sym_l && !sym_r) + return left->ip != right->ip; + + return !!_sort__sym_cmp(sym_l, sym_r); +} + + static int iter_add_next_cumulative_entry(struct hist_entry_iter *iter, struct addr_location *al) @@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, }; int i; struct callchain_cursor cursor; + bool fast = hists__has(he_tmp.hists, sym); callchain_cursor_snapshot(&cursor, &callchain_cursor); @@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, * It's possible that it has cycles or recursive calls. */ for (i = 0; i < iter->curr; i++) { + /* + * For most cases, there are no duplicate entries in callchain. + * The symbols are usually different. Do a quick check for + * symbols first. + */ + if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp)) + continue; + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { /* to avoid calling callback function */ iter->he = NULL; @@ -2637,7 +2660,7 @@ size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) size_t ret = 0; evlist__for_each_entry(evlist, pos) { - ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos)); + ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp); } @@ -2661,7 +2684,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; u64 nr_events = hists->stats.total_period; struct evsel *evsel = hists_to_evsel(hists); - const char *ev_name = perf_evsel__name(evsel); + const char *ev_name = evsel__name(evsel); char buf[512], sample_freq_str[64] = ""; size_t buflen = sizeof(buf); char ref[30] = " show reference callgraph, "; @@ -2672,10 +2695,10 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh nr_events = hists->stats.total_non_filtered_period; } - if (perf_evsel__is_group_event(evsel)) { + if (evsel__is_group_event(evsel)) { struct evsel *pos; - perf_evsel__group_desc(evsel, buf, buflen); + evsel__group_desc(evsel, buf, buflen); ev_name = buf; for_each_group_member(pos, evsel) { diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 34cb380d19a3..af1e78d76228 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -432,7 +432,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq, le64_to_cpu(branch->from), le64_to_cpu(branch->to), btsq->intel_pt_insn.length, - buffer->buffer_nr + 1); + buffer->buffer_nr + 1, true, 0, 0); if (filter && !(filter & btsq->sample_flags)) continue; err = intel_bts_synth_branch_sample(btsq, branch); @@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session) free(bts); } +static bool intel_bts_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, + auxtrace); + + return evsel->core.attr.type == bts->pmu_type; +} + struct intel_bts_synth { struct perf_tool dummy_tool; struct perf_session *session; @@ -816,10 +825,10 @@ static int intel_bts_synth_events(struct intel_bts *bts, bts->branches_id = id; /* * We only use sample types from PERF_SAMPLE_MASK so we can use - * __perf_evsel__sample_size() here. + * __evsel__sample_size() here. */ bts->branches_event_size = sizeof(struct perf_record_sample) + - __perf_evsel__sample_size(attr.sample_type); + __evsel__sample_size(attr.sample_type); } return 0; @@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event, bts->auxtrace.flush_events = intel_bts_flush; bts->auxtrace.free_events = intel_bts_free_events; bts->auxtrace.free = intel_bts_free; + bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace; session->auxtrace = &bts->auxtrace; intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 0ccf10a0bf44..4ce109993e74 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -552,7 +552,7 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, break; default: break; - }; + } if (!(byte & BIT(0))) { if (byte == 0) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 23c8289c2472..f17b1e769ae4 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -33,6 +33,7 @@ #include "tsc.h" #include "intel-pt.h" #include "config.h" +#include "util/perf_api_probe.h" #include "util/synthetic-events.h" #include "time-utils.h" @@ -68,6 +69,10 @@ struct intel_pt { bool est_tsc; bool sync_switch; bool mispred_all; + bool use_thread_stack; + bool callstack; + unsigned int br_stack_sz; + unsigned int br_stack_sz_plus; int have_sched_switch; u32 pmu_type; u64 kernel_start; @@ -124,6 +129,9 @@ struct intel_pt { struct range *time_ranges; unsigned int range_cnt; + + struct ip_callchain *chain; + struct branch_stack *br_stack; }; enum switch_state { @@ -143,8 +151,6 @@ struct intel_pt_queue { const struct intel_pt_state *state; struct ip_callchain *chain; struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; - size_t last_branch_pos; union perf_event *event_buf; bool on_heap; bool stop; @@ -868,6 +874,83 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) pt->tc.time_mult; } +static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) +{ + size_t sz = sizeof(struct ip_callchain); + + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); + return zalloc(sz); +} + +static int intel_pt_callchain_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; + } + + pt->chain = intel_pt_alloc_chain(pt); + if (!pt->chain) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_callchain(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__sample_late(thread, sample->cpu, pt->chain, + pt->synth_opts.callchain_sz + 1, sample->ip, + pt->kernel_start); + + sample->callchain = pt->chain; +} + +static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt) +{ + size_t sz = sizeof(struct branch_stack); + + sz += pt->br_stack_sz * sizeof(struct branch_entry); + return zalloc(sz); +} + +static int intel_pt_br_stack_init(struct intel_pt *pt) +{ + struct evsel *evsel; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) + evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; + } + + pt->br_stack = intel_pt_alloc_br_stack(pt); + if (!pt->br_stack) + return -ENOMEM; + + return 0; +} + +static void intel_pt_add_br_stack(struct intel_pt *pt, + struct perf_sample *sample) +{ + struct thread *thread = machine__findnew_thread(pt->machine, + sample->pid, + sample->tid); + + thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, + pt->br_stack_sz, sample->ip, + pt->kernel_start); + + sample->branch_stack = pt->br_stack; +} + static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { @@ -880,26 +963,15 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, return NULL; if (pt->synth_opts.callchain) { - size_t sz = sizeof(struct ip_callchain); - - /* Add 1 to callchain_sz for callchain context */ - sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); - ptq->chain = zalloc(sz); + ptq->chain = intel_pt_alloc_chain(pt); if (!ptq->chain) goto out_free; } if (pt->synth_opts.last_branch) { - size_t sz = sizeof(struct branch_stack); - - sz += pt->synth_opts.last_branch_sz * - sizeof(struct branch_entry); - ptq->last_branch = zalloc(sz); + ptq->last_branch = intel_pt_alloc_br_stack(pt); if (!ptq->last_branch) goto out_free; - ptq->last_branch_rb = zalloc(sz); - if (!ptq->last_branch_rb) - goto out_free; } ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -968,7 +1040,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, out_free: zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); return NULL; @@ -984,7 +1055,6 @@ static void intel_pt_free_queue(void *priv) intel_pt_decoder_free(ptq->decoder); zfree(&ptq->event_buf); zfree(&ptq->last_branch); - zfree(&ptq->last_branch_rb); zfree(&ptq->chain); free(ptq); } @@ -1152,58 +1222,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt) return 0; } -static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) -{ - struct branch_stack *bs_src = ptq->last_branch_rb; - struct branch_stack *bs_dst = ptq->last_branch; - size_t nr = 0; - - bs_dst->nr = bs_src->nr; - - if (!bs_src->nr) - return; - - nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; - memcpy(&bs_dst->entries[0], - &bs_src->entries[ptq->last_branch_pos], - sizeof(struct branch_entry) * nr); - - if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { - memcpy(&bs_dst->entries[nr], - &bs_src->entries[0], - sizeof(struct branch_entry) * ptq->last_branch_pos); - } -} - -static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) -{ - ptq->last_branch_pos = 0; - ptq->last_branch_rb->nr = 0; -} - -static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) -{ - const struct intel_pt_state *state = ptq->state; - struct branch_stack *bs = ptq->last_branch_rb; - struct branch_entry *be; - - if (!ptq->last_branch_pos) - ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; - - ptq->last_branch_pos -= 1; - - be = &bs->entries[ptq->last_branch_pos]; - be->from = state->from_ip; - be->to = state->to_ip; - be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); - be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); - /* No support for mispredict */ - be->flags.mispred = ptq->pt->mispred_all; - - if (bs->nr < ptq->pt->synth_opts.last_branch_sz) - bs->nr += 1; -} - static inline bool intel_pt_skip_event(struct intel_pt *pt) { return pt->synth_opts.initial_skip && @@ -1271,9 +1289,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, return intel_pt_inject_event(event, sample, type); } -static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, - union perf_event *event, - struct perf_sample *sample, u64 type) +static int intel_pt_deliver_synth_event(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample, u64 type) { int ret; @@ -1333,8 +1351,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; } - return intel_pt_deliver_synth_b_event(pt, event, &sample, - pt->branches_sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, + pt->branches_sample_type); } static void intel_pt_prep_sample(struct intel_pt *pt, @@ -1352,27 +1370,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt, } if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, + pt->br_stack_sz); sample->branch_stack = ptq->last_branch; } } -static inline int intel_pt_deliver_synth_event(struct intel_pt *pt, - struct intel_pt_queue *ptq, - union perf_event *event, - struct perf_sample *sample, - u64 type) -{ - int ret; - - ret = intel_pt_deliver_synth_b_event(pt, event, sample, type); - - if (pt->synth_opts.last_branch) - intel_pt_reset_last_branch_rb(ptq); - - return ret; -} - static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; @@ -1397,7 +1400,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) ptq->last_insn_cnt = ptq->state->tot_insn_cnt; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->instructions_sample_type); } @@ -1415,7 +1418,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) sample.id = ptq->pt->transactions_id; sample.stream_id = ptq->pt->transactions_id; - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->transactions_sample_type); } @@ -1456,7 +1459,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->ptwrites_sample_type); } @@ -1486,7 +1489,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1511,7 +1514,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1536,7 +1539,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1561,7 +1564,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1586,7 +1589,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) sample.raw_size = perf_synth__raw_size(raw); sample.raw_data = perf_synth__raw_data(&raw); - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, + return intel_pt_deliver_synth_event(pt, event, &sample, pt->pwr_events_sample_type); } @@ -1680,15 +1683,14 @@ static u64 intel_pt_lbr_flags(u64 info) union { struct branch_flags flags; u64 result; - } u = { - .flags = { - .mispred = !!(info & LBR_INFO_MISPRED), - .predicted = !(info & LBR_INFO_MISPRED), - .in_tx = !!(info & LBR_INFO_IN_TX), - .abort = !!(info & LBR_INFO_ABORT), - .cycles = info & LBR_INFO_CYCLES, - } - }; + } u; + + u.result = 0; + u.flags.mispred = !!(info & LBR_INFO_MISPRED); + u.flags.predicted = !(info & LBR_INFO_MISPRED); + u.flags.in_tx = !!(info & LBR_INFO_IN_TX); + u.flags.abort = !!(info & LBR_INFO_ABORT); + u.flags.cycles = info & LBR_INFO_CYCLES; return u.result; } @@ -1807,7 +1809,9 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) intel_pt_add_lbrs(&br.br_stack, items); sample.branch_stack = &br.br_stack; } else if (pt->synth_opts.last_branch) { - intel_pt_copy_last_branch_rb(ptq); + thread_stack__br_sample(ptq->thread, ptq->cpu, + ptq->last_branch, + pt->br_stack_sz); sample.branch_stack = ptq->last_branch; } else { br.br_stack.nr = 0; @@ -1842,7 +1846,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) sample.transaction = txn; } - return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type); + return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, @@ -1992,12 +1996,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (!(state->type & INTEL_PT_BRANCH)) return 0; - if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) - thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip, - state->to_ip, ptq->insn_len, - state->trace_nr); - else + if (pt->use_thread_stack) { + thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, + state->from_ip, state->to_ip, ptq->insn_len, + state->trace_nr, pt->callstack, + pt->br_stack_sz_plus, + pt->mispred_all); + } else { thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); + } if (pt->sample_branches) { err = intel_pt_synth_branch_sample(ptq); @@ -2005,9 +2012,6 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) return err; } - if (pt->synth_opts.last_branch) - intel_pt_update_last_branch_rb(ptq); - if (!ptq->sync_switch) return 0; @@ -2484,7 +2488,7 @@ static int intel_pt_process_switch(struct intel_pt *pt, if (evsel != pt->switch_evsel) return 0; - tid = perf_evsel__intval(evsel, sample, "next_pid"); + tid = evsel__intval(evsel, sample, "next_pid"); cpu = sample->cpu; intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", @@ -2639,6 +2643,13 @@ static int intel_pt_process_event(struct perf_session *session, if (err) return err; + if (event->header.type == PERF_RECORD_SAMPLE) { + if (pt->synth_opts.add_callchain && !sample->callchain) + intel_pt_add_callchain(pt, sample); + if (pt->synth_opts.add_last_branch && !sample->branch_stack) + intel_pt_add_br_stack(pt, sample); + } + if (event->header.type == PERF_RECORD_AUX && (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && pt->synth_opts.errors) { @@ -2710,11 +2721,21 @@ static void intel_pt_free(struct perf_session *session) session->auxtrace = NULL; thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); + zfree(&pt->chain); zfree(&pt->filter); zfree(&pt->time_ranges); free(pt); } +static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, + struct evsel *evsel) +{ + struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, + auxtrace); + + return evsel->core.attr.type == pt->pmu_type; +} + static int intel_pt_process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -3016,7 +3037,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) struct evsel *evsel; evlist__for_each_entry_reverse(evlist, evsel) { - const char *name = perf_evsel__name(evsel); + const char *name = evsel__name(evsel); if (!strcmp(name, "sched:sched_switch")) return evsel; @@ -3310,6 +3331,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->auxtrace.flush_events = intel_pt_flush; pt->auxtrace.free_events = intel_pt_free_events; pt->auxtrace.free = intel_pt_free; + pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; session->auxtrace = &pt->auxtrace; if (dump_trace) @@ -3338,6 +3360,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, !session->itrace_synth_opts->inject) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; } pt->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; @@ -3370,14 +3393,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->branches_filter |= PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_BEGIN; - if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { + if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && + !symbol_conf.use_callchain) { symbol_conf.use_callchain = true; if (callchain_register_param(&callchain_param) < 0) { symbol_conf.use_callchain = false; pt->synth_opts.callchain = false; + pt->synth_opts.add_callchain = false; } } + if (pt->synth_opts.add_callchain) { + err = intel_pt_callchain_init(pt); + if (err) + goto err_delete_thread; + } + + if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { + pt->br_stack_sz = pt->synth_opts.last_branch_sz; + pt->br_stack_sz_plus = pt->br_stack_sz; + } + + if (pt->synth_opts.add_last_branch) { + err = intel_pt_br_stack_init(pt); + if (err) + goto err_delete_thread; + /* + * Additional branch stack size to cater for tracing from the + * actual sample ip to where the sample time is recorded. + * Measured at about 200 branches, but generously set to 1024. + * If kernel space is not being traced, then add just 1 for the + * branch to kernel space. + */ + if (intel_pt_tracing_kernel(pt)) + pt->br_stack_sz_plus += 1024; + else + pt->br_stack_sz_plus += 1; + } + + pt->use_thread_stack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack || + pt->synth_opts.last_branch || + pt->synth_opts.add_last_branch; + + pt->callstack = pt->synth_opts.callchain || + pt->synth_opts.add_callchain || + pt->synth_opts.thread_stack; + err = intel_pt_synth_events(pt, session); if (err) goto err_delete_thread; @@ -3400,6 +3463,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, return 0; err_delete_thread: + zfree(&pt->chain); thread__zput(pt->unknown_thread); err_free_queues: intel_pt_log_disable(); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 97142e9671be..8ed2135893bb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -736,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static int is_bpf_image(const char *name) +{ + return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) || + strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1); +} + static int machine__process_ksymbol_register(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) @@ -759,6 +765,12 @@ static int machine__process_ksymbol_register(struct machine *machine, map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; maps__insert(&machine->kmaps, map); + dso__set_loaded(dso); + + if (is_bpf_image(event->ksymbol.name)) { + dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE; + dso__set_long_name(dso, "", false); + } } sym = symbol__new(map->map_ip(map, map->start), @@ -2178,6 +2190,303 @@ static int remove_loops(struct branch_entry *l, int nr, return nr; } +static int lbr_callchain_add_kernel_ip(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + u64 branch_from, + bool callee, int end) +{ + struct ip_callchain *chain = sample->callchain; + u8 cpumode = PERF_RECORD_MISC_USER; + int err, i; + + if (callee) { + for (i = 0; i < end + 1; i++) { + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, chain->ips[i], + false, NULL, NULL, branch_from); + if (err) + return err; + } + return 0; + } + + for (i = end; i >= 0; i--) { + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, chain->ips[i], + false, NULL, NULL, branch_from); + if (err) + return err; + } + + return 0; +} + +static void save_lbr_cursor_node(struct thread *thread, + struct callchain_cursor *cursor, + int idx) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + + if (!lbr_stitch) + return; + + if (cursor->pos == cursor->nr) { + lbr_stitch->prev_lbr_cursor[idx].valid = false; + return; + } + + if (!cursor->curr) + cursor->curr = cursor->first; + else + cursor->curr = cursor->curr->next; + memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr, + sizeof(struct callchain_cursor_node)); + + lbr_stitch->prev_lbr_cursor[idx].valid = true; + cursor->pos++; +} + +static int lbr_callchain_add_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + u64 *branch_from, + bool callee) +{ + struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); + u8 cpumode = PERF_RECORD_MISC_USER; + int lbr_nr = lbr_stack->nr; + struct branch_flags *flags; + int err, i; + u64 ip; + + /* + * The curr and pos are not used in writing session. They are cleared + * in callchain_cursor_commit() when the writing session is closed. + * Using curr and pos to track the current cursor node. + */ + if (thread->lbr_stitch) { + cursor->curr = NULL; + cursor->pos = cursor->nr; + if (cursor->nr) { + cursor->curr = cursor->first; + for (i = 0; i < (int)(cursor->nr - 1); i++) + cursor->curr = cursor->curr->next; + } + } + + if (callee) { + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + + /* + * The number of cursor node increases. + * Move the current cursor node. + * But does not need to save current cursor node for entry 0. + * It's impossible to stitch the whole LBRs of previous sample. + */ + if (thread->lbr_stitch && (cursor->pos != cursor->nr)) { + if (!cursor->curr) + cursor->curr = cursor->first; + else + cursor->curr = cursor->curr->next; + cursor->pos++; + } + + /* Add LBR ip from entries.from one by one. */ + for (i = 0; i < lbr_nr; i++) { + ip = entries[i].from; + flags = &entries[i].flags; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + save_lbr_cursor_node(thread, cursor, i); + } + return 0; + } + + /* Add LBR ip from entries.from one by one. */ + for (i = lbr_nr - 1; i >= 0; i--) { + ip = entries[i].from; + flags = &entries[i].flags; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + save_lbr_cursor_node(thread, cursor, i); + } + + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + + return 0; +} + +static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread, + struct callchain_cursor *cursor) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct callchain_cursor_node *cnode; + struct stitch_list *stitch_node; + int err; + + list_for_each_entry(stitch_node, &lbr_stitch->lists, node) { + cnode = &stitch_node->cursor; + + err = callchain_cursor_append(cursor, cnode->ip, + &cnode->ms, + cnode->branch, + &cnode->branch_flags, + cnode->nr_loop_iter, + cnode->iter_cycles, + cnode->branch_from, + cnode->srcline); + if (err) + return err; + } + return 0; +} + +static struct stitch_list *get_stitch_node(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *stitch_node; + + if (!list_empty(&lbr_stitch->free_lists)) { + stitch_node = list_first_entry(&lbr_stitch->free_lists, + struct stitch_list, node); + list_del(&stitch_node->node); + + return stitch_node; + } + + return malloc(sizeof(struct stitch_list)); +} + +static bool has_stitched_lbr(struct thread *thread, + struct perf_sample *cur, + struct perf_sample *prev, + unsigned int max_lbr, + bool callee) +{ + struct branch_stack *cur_stack = cur->branch_stack; + struct branch_entry *cur_entries = perf_sample__branch_entries(cur); + struct branch_stack *prev_stack = prev->branch_stack; + struct branch_entry *prev_entries = perf_sample__branch_entries(prev); + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + int i, j, nr_identical_branches = 0; + struct stitch_list *stitch_node; + u64 cur_base, distance; + + if (!cur_stack || !prev_stack) + return false; + + /* Find the physical index of the base-of-stack for current sample. */ + cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1; + + distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) : + (max_lbr + prev_stack->hw_idx - cur_base); + /* Previous sample has shorter stack. Nothing can be stitched. */ + if (distance + 1 > prev_stack->nr) + return false; + + /* + * Check if there are identical LBRs between two samples. + * Identicall LBRs must have same from, to and flags values. Also, + * they have to be saved in the same LBR registers (same physical + * index). + * + * Starts from the base-of-stack of current sample. + */ + for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) { + if ((prev_entries[i].from != cur_entries[j].from) || + (prev_entries[i].to != cur_entries[j].to) || + (prev_entries[i].flags.value != cur_entries[j].flags.value)) + break; + nr_identical_branches++; + } + + if (!nr_identical_branches) + return false; + + /* + * Save the LBRs between the base-of-stack of previous sample + * and the base-of-stack of current sample into lbr_stitch->lists. + * These LBRs will be stitched later. + */ + for (i = prev_stack->nr - 1; i > (int)distance; i--) { + + if (!lbr_stitch->prev_lbr_cursor[i].valid) + continue; + + stitch_node = get_stitch_node(thread); + if (!stitch_node) + return false; + + memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i], + sizeof(struct callchain_cursor_node)); + + if (callee) + list_add(&stitch_node->node, &lbr_stitch->lists); + else + list_add_tail(&stitch_node->node, &lbr_stitch->lists); + } + + return true; +} + +static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr) +{ + if (thread->lbr_stitch) + return true; + + thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch)); + if (!thread->lbr_stitch) + goto err; + + thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node)); + if (!thread->lbr_stitch->prev_lbr_cursor) + goto free_lbr_stitch; + + INIT_LIST_HEAD(&thread->lbr_stitch->lists); + INIT_LIST_HEAD(&thread->lbr_stitch->free_lists); + + return true; + +free_lbr_stitch: + zfree(&thread->lbr_stitch); +err: + pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n"); + thread->lbr_stitch_enable = false; + return false; +} + /* * Recolve LBR callstack chain sample * Return: @@ -2190,12 +2499,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread, struct perf_sample *sample, struct symbol **parent, struct addr_location *root_al, - int max_stack) + int max_stack, + unsigned int max_lbr) { + bool callee = (callchain_param.order == ORDER_CALLEE); struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr), i; - u8 cpumode = PERF_RECORD_MISC_USER; - u64 ip, branch_from = 0; + struct lbr_stitch *lbr_stitch; + bool stitched_lbr = false; + u64 branch_from = 0; + int err; for (i = 0; i < chain_nr; i++) { if (chain->ips[i] == PERF_CONTEXT_USER) @@ -2203,71 +2516,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread, } /* LBR only affects the user callchain */ - if (i != chain_nr) { - struct branch_stack *lbr_stack = sample->branch_stack; - struct branch_entry *entries = perf_sample__branch_entries(sample); - int lbr_nr = lbr_stack->nr, j, k; - bool branch; - struct branch_flags *flags; - /* - * LBR callstack can only get user call chain. - * The mix_chain_nr is kernel call chain - * number plus LBR user call chain number. - * i is kernel call chain number, - * 1 is PERF_CONTEXT_USER, - * lbr_nr + 1 is the user call chain number. - * For details, please refer to the comments - * in callchain__printf - */ - int mix_chain_nr = i + 1 + lbr_nr + 1; + if (i == chain_nr) + return 0; - for (j = 0; j < mix_chain_nr; j++) { - int err; - branch = false; - flags = NULL; + if (thread->lbr_stitch_enable && !sample->no_hw_idx && + (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) { + lbr_stitch = thread->lbr_stitch; - if (callchain_param.order == ORDER_CALLEE) { - if (j < i + 1) - ip = chain->ips[j]; - else if (j > i + 1) { - k = j - i - 2; - ip = entries[k].from; - branch = true; - flags = &entries[k].flags; - } else { - ip = entries[0].to; - branch = true; - flags = &entries[0].flags; - branch_from = entries[0].from; - } - } else { - if (j < lbr_nr) { - k = lbr_nr - j - 1; - ip = entries[k].from; - branch = true; - flags = &entries[k].flags; - } - else if (j > lbr_nr) - ip = chain->ips[i + 1 - (j - lbr_nr)]; - else { - ip = entries[0].to; - branch = true; - flags = &entries[0].flags; - branch_from = entries[0].from; - } - } + stitched_lbr = has_stitched_lbr(thread, sample, + &lbr_stitch->prev_sample, + max_lbr, callee); - err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, ip, - branch, flags, NULL, - branch_from); + if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) { + list_replace_init(&lbr_stitch->lists, + &lbr_stitch->free_lists); + } + memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample)); + } + + if (callee) { + /* Add kernel ip */ + err = lbr_callchain_add_kernel_ip(thread, cursor, sample, + parent, root_al, branch_from, + true, i); + if (err) + goto error; + + err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, + root_al, &branch_from, true); + if (err) + goto error; + + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); if (err) - return (err < 0) ? err : 0; + goto error; } - return 1; + + } else { + if (stitched_lbr) { + err = lbr_callchain_add_stitched_lbr_ip(thread, cursor); + if (err) + goto error; + } + err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent, + root_al, &branch_from, false); + if (err) + goto error; + + /* Add kernel ip */ + err = lbr_callchain_add_kernel_ip(thread, cursor, sample, + parent, root_al, branch_from, + false, i); + if (err) + goto error; } + return 1; - return 0; +error: + return (err < 0) ? err : 0; } static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, @@ -2311,9 +2618,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain) chain_nr = chain->nr; - if (perf_evsel__has_branch_callstack(evsel)) { + if (evsel__has_branch_callstack(evsel)) { + struct perf_env *env = evsel__env(evsel); + err = resolve_lbr_callchain_sample(thread, cursor, sample, parent, - root_al, max_stack); + root_al, max_stack, + !env ? 0 : env->max_branches); if (err) return (err < 0) ? err : 0; } diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c index 797d86a1ab09..c84f5841c7ab 100644 --- a/tools/perf/util/mem2node.c +++ b/tools/perf/util/mem2node.c @@ -1,5 +1,6 @@ #include <errno.h> #include <inttypes.h> +#include <asm/bug.h> #include <linux/bitmap.h> #include <linux/kernel.h> #include <linux/zalloc.h> @@ -95,7 +96,7 @@ int mem2node__init(struct mem2node *map, struct perf_env *env) /* Cut unused entries, due to merging. */ tmp_entries = realloc(entries, sizeof(*entries) * j); - if (tmp_entries) + if (tmp_entries || WARN_ON_ONCE(j == 0)) entries = tmp_entries; for (i = 0; i < j; i++) { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 926449a7cdbf..b071df373f8b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -90,6 +90,7 @@ struct egroup { const char *metric_name; const char *metric_expr; const char *metric_unit; + int runtime; }; static struct evsel *find_evsel_group(struct evlist *perf_evlist, @@ -202,6 +203,7 @@ static int metricgroup__setup_events(struct list_head *groups, expr->metric_name = eg->metric_name; expr->metric_unit = eg->metric_unit; expr->metric_events = metric_events; + expr->runtime = eg->runtime; list_add(&expr->nd, &me->head); } @@ -485,6 +487,45 @@ static bool metricgroup__has_constraint(struct pmu_event *pe) return false; } +int __weak arch_get_runtimeparam(void) +{ + return 1; +} + +static int __metricgroup__add_metric(struct strbuf *events, + struct list_head *group_list, struct pmu_event *pe, int runtime) +{ + + const char **ids; + int idnum; + struct egroup *eg; + + if (expr__find_other(pe->metric_expr, NULL, &ids, &idnum, runtime) < 0) + return -EINVAL; + + if (events->len > 0) + strbuf_addf(events, ","); + + if (metricgroup__has_constraint(pe)) + metricgroup__add_metric_non_group(events, ids, idnum); + else + metricgroup__add_metric_weak_group(events, ids, idnum); + + eg = malloc(sizeof(*eg)); + if (!eg) + return -ENOMEM; + + eg->ids = ids; + eg->idnum = idnum; + eg->metric_name = pe->metric_name; + eg->metric_expr = pe->metric_expr; + eg->metric_unit = pe->unit; + eg->runtime = runtime; + list_add_tail(&eg->nd, group_list); + + return 0; +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { @@ -504,35 +545,26 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, continue; if (match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric)) { - const char **ids; - int idnum; - struct egroup *eg; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); - if (expr__find_other(pe->metric_expr, - NULL, &ids, &idnum) < 0) - continue; - if (events->len > 0) - strbuf_addf(events, ","); + if (!strstr(pe->metric_expr, "?")) { + ret = __metricgroup__add_metric(events, group_list, pe, 1); + } else { + int j, count; - if (metricgroup__has_constraint(pe)) - metricgroup__add_metric_non_group(events, ids, idnum); - else - metricgroup__add_metric_weak_group(events, ids, idnum); + count = arch_get_runtimeparam(); - eg = malloc(sizeof(struct egroup)); - if (!eg) { - ret = -ENOMEM; - break; + /* This loop is added to create multiple + * events depend on count value and add + * those events to group_list. + */ + + for (j = 0; j < count; j++) + ret = __metricgroup__add_metric(events, group_list, pe, j); } - eg->ids = ids; - eg->idnum = idnum; - eg->metric_name = pe->metric_name; - eg->metric_expr = pe->metric_expr; - eg->metric_unit = pe->unit; - list_add_tail(&eg->nd, group_list); - ret = 0; + if (ret == -ENOMEM) + break; } } return ret; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 475c7f912864..6b09eb30b4ec 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -22,6 +22,7 @@ struct metric_expr { const char *metric_name; const char *metric_unit; struct evsel **metric_events; + int runtime; }; struct metric_event *metricgroup__lookup(struct rblist *metric_events, @@ -34,4 +35,5 @@ int metricgroup__parse_groups(const struct option *opt, void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); +int arch_get_runtimeparam(void); #endif diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 359db2b1fcef..48c8f609441b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -314,7 +314,7 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, case OE_FLUSH__NONE: default: break; - }; + } pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n", str[how], oe->nr_events); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 10107747b361..b7a0518d607d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -461,7 +461,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, cache_op = parse_aliases(str, perf_evsel__hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); if (cache_op >= 0) { - if (!perf_evsel__is_cache_op_valid(cache_type, cache_op)) + if (!evsel__is_cache_op_valid(cache_type, cache_op)) return -EINVAL; continue; } @@ -1482,6 +1482,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, list_for_each_entry_safe(pos, tmp, &config_terms, list) { list_del_init(&pos->list); + zfree(&pos->val.str); free(pos); } return -EINVAL; @@ -1870,7 +1871,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->precise_max = mod.precise_max; evsel->weak_group = mod.weak; - if (perf_evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) evsel->core.attr.pinned = mod.pinned; } @@ -2190,6 +2191,29 @@ int parse_events_option(const struct option *opt, const char *str, return ret; } +int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset) +{ + struct evlist **evlistp = opt->value; + int ret; + + if (*evlistp == NULL) { + *evlistp = evlist__new(); + + if (*evlistp == NULL) { + fprintf(stderr, "Not enough memory to create evlist\n"); + return -1; + } + } + + ret = parse_events_option(opt, str, unset); + if (ret) { + evlist__delete(*evlistp); + *evlistp = NULL; + } + + return ret; +} + static int foreach_evsel_in_last_glob(struct evlist *evlist, int (*func)(struct evsel *evsel, @@ -2237,7 +2261,7 @@ static int set_filter(struct evsel *evsel, const void *arg) } if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) { - if (perf_evsel__append_tp_filter(evsel, str) < 0) { + if (evsel__append_tp_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2262,7 +2286,7 @@ static int set_filter(struct evsel *evsel, const void *arg) return -1; } - if (perf_evsel__append_addr_filter(evsel, str) < 0) { + if (evsel__append_addr_filter(evsel, str) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2293,7 +2317,7 @@ static int add_exclude_perf_filter(struct evsel *evsel, snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); - if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) { + if (evsel__append_tp_filter(evsel, new_filter) < 0) { fprintf(stderr, "not enough memory to hold filter string\n"); return -1; @@ -2603,12 +2627,11 @@ restart: for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ - if (!perf_evsel__is_cache_op_valid(type, op)) + if (!evsel__is_cache_op_valid(type, op)) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - __perf_evsel__hw_cache_type_op_res_name(type, op, i, - name, sizeof(name)); + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 27596cbd0ba0..6ead9661238c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -31,6 +31,7 @@ bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); int parse_events_option(const struct option *opt, const char *str, int unset); +int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset); int parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error); int parse_events_terms(struct list_head *terms, const char *str); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index baa48f28d57d..c589fc42f058 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -286,6 +286,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } +r{num_raw_hex} { return raw(yyscanner); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 94f8bcd83582..c4ca932d092d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -44,7 +44,7 @@ static void free_list_evsel(struct list_head* list_evsel) list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) { list_del_init(&evsel->core.node); - perf_evsel__delete(evsel); + evsel__delete(evsel); } free(list_evsel); } @@ -326,6 +326,7 @@ PE_NAME opt_pmu_config } parse_events_terms__delete($2); parse_events_terms__delete(orig_terms); + free(pattern); free($1); $$ = list; #undef CLEANUP_YYABORT @@ -706,6 +707,15 @@ event_term } event_term: +PE_RAW +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG, + NULL, $1, false, &@1, NULL)); + $$ = term; +} +| PE_NAME '=' PE_NAME { struct parse_events_term *term; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c new file mode 100644 index 000000000000..1337965673d7 --- /dev/null +++ b/tools/perf/util/perf_api_probe.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "perf-sys.h" +#include "util/cloexec.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/parse-events.h" +#include "util/perf_api_probe.h" +#include <perf/cpumap.h> +#include <errno.h> + +typedef void (*setup_probe_fn_t)(struct evsel *evsel); + +static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +{ + struct evlist *evlist; + struct evsel *evsel; + unsigned long flags = perf_event_open_cloexec_flag(); + int err = -EAGAIN, fd; + static pid_t pid = -1; + + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + if (parse_events(evlist, str, NULL)) + goto out_delete; + + evsel = evlist__first(evlist); + + while (1) { + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (pid == -1 && errno == EACCES) { + pid = 0; + continue; + } + goto out_delete; + } + break; + } + close(fd); + + fn(evsel); + + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + if (fd < 0) { + if (errno == EINVAL) + err = -EINVAL; + goto out_delete; + } + close(fd); + err = 0; + +out_delete: + evlist__delete(evlist); + return err; +} + +static bool perf_probe_api(setup_probe_fn_t fn) +{ + const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_cpu_map *cpus; + int cpu, ret, i = 0; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + do { + ret = perf_do_probe_api(fn, cpu, try[i++]); + if (!ret) + return true; + } while (ret == -EAGAIN && try[i]); + + return false; +} + +static void perf_probe_sample_identifier(struct evsel *evsel) +{ + evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; +} + +static void perf_probe_comm_exec(struct evsel *evsel) +{ + evsel->core.attr.comm_exec = 1; +} + +static void perf_probe_context_switch(struct evsel *evsel) +{ + evsel->core.attr.context_switch = 1; +} + +bool perf_can_sample_identifier(void) +{ + return perf_probe_api(perf_probe_sample_identifier); +} + +bool perf_can_comm_exec(void) +{ + return perf_probe_api(perf_probe_comm_exec); +} + +bool perf_can_record_switch_events(void) +{ + return perf_probe_api(perf_probe_context_switch); +} + +bool perf_can_record_cpu_wide(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .exclude_kernel = 1, + }; + struct perf_cpu_map *cpus; + int cpu, fd; + + cpus = perf_cpu_map__new(NULL); + if (!cpus) + return false; + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); + + fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + if (fd < 0) + return false; + close(fd); + + return true; +} + +/* + * Architectures are expected to know if AUX area sampling is supported by the + * hardware. Here we check for kernel support. + */ +bool perf_can_aux_sample(void) +{ + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .exclude_kernel = 1, + /* + * Non-zero value causes the kernel to calculate the effective + * attribute size up to that byte. + */ + .aux_sample_size = 1, + }; + int fd; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + /* + * If the kernel attribute is big enough to contain aux_sample_size + * then we assume that it is supported. We are relying on the kernel to + * validate the attribute size before anything else that could be wrong. + */ + if (fd < 0 && errno == E2BIG) + return false; + if (fd >= 0) + close(fd); + + return true; +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h new file mode 100644 index 000000000000..706c3c6426e2 --- /dev/null +++ b/tools/perf/util/perf_api_probe.h @@ -0,0 +1,14 @@ + +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_API_PROBE_H +#define __PERF_API_PROBE_H + +#include <stdbool.h> + +bool perf_can_aux_sample(void); +bool perf_can_comm_exec(void); +bool perf_can_record_cpu_wide(void); +bool perf_can_record_switch_events(void); +bool perf_can_sample_identifier(void); + +#endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ef6a63f3d386..92bd7fafcce6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,6 +18,7 @@ #include <regex.h> #include <perf/cpumap.h> #include "debug.h" +#include "evsel.h" #include "pmu.h" #include "parse-events.h" #include "header.h" @@ -849,6 +850,7 @@ static struct perf_pmu *pmu_lookup(const char *name) INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); + INIT_LIST_HEAD(&pmu->caps); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); @@ -869,6 +871,17 @@ static struct perf_pmu *pmu_find(const char *name) return NULL; } +struct perf_pmu *perf_pmu__find_by_type(unsigned int type) +{ + struct perf_pmu *pmu; + + list_for_each_entry(pmu, &pmus, list) + if (pmu->type == type) + return pmu; + + return NULL; +} + struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) { /* @@ -884,6 +897,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) return NULL; } +struct perf_pmu *evsel__find_pmu(struct evsel *evsel) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu->type == evsel->core.attr.type) + break; + } + + return pmu; +} + +bool evsel__is_aux_event(struct evsel *evsel) +{ + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + return pmu && pmu->auxtrace; +} + struct perf_pmu *perf_pmu__find(const char *name) { struct perf_pmu *pmu; @@ -1574,3 +1606,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, va_end(args); return ret; } + +static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) +{ + struct perf_pmu_caps *caps = zalloc(sizeof(*caps)); + + if (!caps) + return -ENOMEM; + + caps->name = strdup(name); + if (!caps->name) + goto free_caps; + caps->value = strndup(value, strlen(value) - 1); + if (!caps->value) + goto free_name; + list_add_tail(&caps->list, list); + return 0; + +free_name: + zfree(caps->name); +free_caps: + free(caps); + + return -ENOMEM; +} + +/* + * Reading/parsing the given pmu capabilities, which should be located at: + * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. + * Return the number of capabilities + */ +int perf_pmu__caps_parse(struct perf_pmu *pmu) +{ + struct stat st; + char caps_path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + DIR *caps_dir; + struct dirent *evt_ent; + int nr_caps = 0; + + if (!sysfs) + return -1; + + snprintf(caps_path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); + + if (stat(caps_path, &st) < 0) + return 0; /* no error if caps does not exist */ + + caps_dir = opendir(caps_path); + if (!caps_dir) + return -EINVAL; + + while ((evt_ent = readdir(caps_dir)) != NULL) { + char path[PATH_MAX + NAME_MAX + 1]; + char *name = evt_ent->d_name; + char value[128]; + FILE *file; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + snprintf(path, sizeof(path), "%s/%s", caps_path, name); + + file = fopen(path, "r"); + if (!file) + continue; + + if (!fgets(value, sizeof(value), file) || + (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) { + fclose(file); + continue; + } + + nr_caps++; + fclose(file); + } + + closedir(caps_dir); + + return nr_caps; +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 5fb3f16828df..cb6fbec50313 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -22,6 +22,12 @@ enum { struct perf_event_attr; +struct perf_pmu_caps { + char *name; + char *value; + struct list_head list; +}; + struct perf_pmu { char *name; __u32 type; @@ -33,6 +39,7 @@ struct perf_pmu { struct perf_cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ }; @@ -65,6 +72,7 @@ struct perf_pmu_alias { }; struct perf_pmu *perf_pmu__find(const char *name); +struct perf_pmu *perf_pmu__find_by_type(unsigned int type); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct list_head *head_terms, struct parse_events_error *error); @@ -107,4 +115,6 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +int perf_pmu__caps_parse(struct perf_pmu *pmu); + #endif /* __PMU_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 83212c65848b..75a9b1d62bba 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -801,7 +801,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel, static void pyrf_evsel__delete(struct pyrf_evsel *pevsel) { - perf_evsel__exit(&pevsel->evsel); + evsel__exit(&pevsel->evsel); Py_TYPE(pevsel)->tp_free((PyObject*)pevsel); } @@ -1044,7 +1044,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, pevent->evsel = evsel; - err = perf_evsel__parse_sample(evsel, event, &pevent->sample); + err = evsel__parse_sample(evsel, event, &pevent->sample); /* Consume the even only after we parsed it out. */ perf_mmap__consume(&md->core); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 7def66168503..a4cc11592f6b 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -10,161 +10,64 @@ #include <subcmd/parse-options.h> #include <perf/cpumap.h> #include "cloexec.h" +#include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" -typedef void (*setup_probe_fn_t)(struct evsel *evsel); - -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +/* + * evsel__config_leader_sampling() uses special rules for leader sampling. + * However, if the leader is an AUX area event, then assume the event to sample + * is the next event. + */ +static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist) { - struct evlist *evlist; - struct evsel *evsel; - unsigned long flags = perf_event_open_cloexec_flag(); - int err = -EAGAIN, fd; - static pid_t pid = -1; - - evlist = evlist__new(); - if (!evlist) - return -ENOMEM; - - if (parse_events(evlist, str, NULL)) - goto out_delete; - - evsel = evlist__first(evlist); + struct evsel *leader = evsel->leader; - while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (pid == -1 && errno == EACCES) { - pid = 0; - continue; - } - goto out_delete; + if (evsel__is_aux_event(leader)) { + evlist__for_each_entry(evlist, evsel) { + if (evsel->leader == leader && evsel != evsel->leader) + return evsel; } - break; - } - close(fd); - - fn(evsel); - - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); - if (fd < 0) { - if (errno == EINVAL) - err = -EINVAL; - goto out_delete; } - close(fd); - err = 0; - -out_delete: - evlist__delete(evlist); - return err; -} - -static bool perf_probe_api(setup_probe_fn_t fn) -{ - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; - struct perf_cpu_map *cpus; - int cpu, ret, i = 0; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); - - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - return false; + return leader; } -static void perf_probe_sample_identifier(struct evsel *evsel) +static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist) { - evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER; -} - -static void perf_probe_comm_exec(struct evsel *evsel) -{ - evsel->core.attr.comm_exec = 1; -} - -static void perf_probe_context_switch(struct evsel *evsel) -{ - evsel->core.attr.context_switch = 1; -} - -bool perf_can_sample_identifier(void) -{ - return perf_probe_api(perf_probe_sample_identifier); -} + struct perf_event_attr *attr = &evsel->core.attr; + struct evsel *leader = evsel->leader; + struct evsel *read_sampler; -static bool perf_can_comm_exec(void) -{ - return perf_probe_api(perf_probe_comm_exec); -} + if (!leader->sample_read) + return; -bool perf_can_record_switch_events(void) -{ - return perf_probe_api(perf_probe_context_switch); -} + read_sampler = evsel__read_sampler(evsel, evlist); -bool perf_can_record_cpu_wide(void) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .exclude_kernel = 1, - }; - struct perf_cpu_map *cpus; - int cpu, fd; - - cpus = perf_cpu_map__new(NULL); - if (!cpus) - return false; - cpu = cpus->map[0]; - perf_cpu_map__put(cpus); + if (evsel == read_sampler) + return; - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); - if (fd < 0) - return false; - close(fd); - - return true; -} - -/* - * Architectures are expected to know if AUX area sampling is supported by the - * hardware. Here we check for kernel support. - */ -bool perf_can_aux_sample(void) -{ - struct perf_event_attr attr = { - .size = sizeof(struct perf_event_attr), - .exclude_kernel = 1, - /* - * Non-zero value causes the kernel to calculate the effective - * attribute size up to that byte. - */ - .aux_sample_size = 1, - }; - int fd; - - fd = sys_perf_event_open(&attr, -1, 0, -1, 0); /* - * If the kernel attribute is big enough to contain aux_sample_size - * then we assume that it is supported. We are relying on the kernel to - * validate the attribute size before anything else that could be wrong. + * Disable sampling for all group members other than the leader in + * case the leader 'leads' the sampling, except when the leader is an + * AUX area event, in which case the 2nd event in the group is the one + * that 'leads' the sampling. */ - if (fd < 0 && errno == E2BIG) - return false; - if (fd >= 0) - close(fd); + attr->freq = 0; + attr->sample_freq = 0; + attr->sample_period = 0; + attr->write_backward = 0; - return true; + /* + * We don't get a sample for slave events, we make them when delivering + * the group leader sample. Set the slave event to follow the master + * sample_type to ease up reporting. + * An AUX area event also has sample_type requirements, so also include + * the sample type bits from the leader's sample_type to cover that + * case. + */ + attr->sample_type = read_sampler->core.attr.sample_type | + leader->core.attr.sample_type; } void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, @@ -188,11 +91,15 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, use_comm_exec = perf_can_comm_exec(); evlist__for_each_entry(evlist, evsel) { - perf_evsel__config(evsel, opts, callchain); + evsel__config(evsel, opts, callchain); if (evsel->tracking && use_comm_exec) evsel->core.attr.comm_exec = 1; } + /* Configure leader sampling here now that the sample type is known */ + evlist__for_each_entry(evlist, evsel) + evsel__config_leader_sampling(evsel, evlist); + if (opts->full_auxtrace) { /* * Need to be able to synthesize and parse selected events with @@ -215,7 +122,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts, if (sample_id) { evlist__for_each_entry(evlist, evsel) - perf_evsel__set_sample_id(evsel, use_sample_identifier); + evsel__set_sample_id(evsel, use_sample_identifier); } perf_evlist__set_id_pos(evlist); diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 24316458be20..923565c3b155 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -68,6 +68,7 @@ struct record_opts { int affinity; int mmap_flush; unsigned int comp_level; + unsigned int nr_threads_synthesize; }; extern const char * const *record_usage; diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h index d4356030b504..f55ca07f3ca1 100644 --- a/tools/perf/util/s390-cpumcf-kernel.h +++ b/tools/perf/util/s390-cpumcf-kernel.h @@ -11,6 +11,7 @@ #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ +#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */ struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ unsigned int def:16; /* 0-15 Data Entry Format */ diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 6785cd87aa4d..f8861998e5bd 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session) free(sf); } +static bool +s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused, + struct evsel *evsel) +{ + return evsel->core.attr.type == PERF_TYPE_RAW && + evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG; +} + static int s390_cpumsf_get_type(const char *cpuid) { int ret, family = 0; @@ -1071,7 +1079,8 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) itops->pwr_events || itops->errors || itops->dont_decode || itops->calls || itops->returns || itops->callchain || itops->thread_stack || - itops->last_branch; + itops->last_branch || itops->add_callchain || + itops->add_last_branch; if (!ison) return true; pr_err("Unsupported --itrace options specified\n"); @@ -1142,6 +1151,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event, sf->auxtrace.flush_events = s390_cpumsf_flush; sf->auxtrace.free_events = s390_cpumsf_free_events; sf->auxtrace.free = s390_cpumsf_free; + sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace; session->auxtrace = &sf->auxtrace; if (dump_trace) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 2c372cf5495e..739516fdf6e3 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -741,7 +741,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, if (!dict_sample) Py_FatalError("couldn't create Python dictionary"); - pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel))); pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr))); pydict_set_item_string_decref(dict_sample, "pid", @@ -968,7 +968,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel) t = tuple_new(2); tuple_set_u64(t, 0, evsel->db_id); - tuple_set_string(t, 1, perf_evsel__name(evsel)); + tuple_set_string(t, 1, evsel__name(evsel)); call_object(tables->evsel_handler, t, "evsel_table"); @@ -1349,7 +1349,7 @@ static void get_handler_name(char *str, size_t size, { char *p = str; - scnprintf(str, size, "stat__%s", perf_evsel__name(evsel)); + scnprintf(str, size, "stat__%s", evsel__name(evsel)); while ((p = strchr(p, ':'))) { *p = '_'; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0b0bfe5bef17..c11d89e0ee55 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1059,7 +1059,7 @@ static void callchain__printf(struct evsel *evsel, unsigned int i; struct ip_callchain *callchain = sample->callchain; - if (perf_evsel__has_branch_callstack(evsel)) + if (evsel__has_branch_callstack(evsel)) callchain__lbr_callstack_printf(sample); printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); @@ -1243,8 +1243,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (evsel__has_callchain(evsel)) callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) - branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel)); + if (evsel__has_br_stack(evsel)) + branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) regs_user__printf(sample); @@ -1280,8 +1280,7 @@ static void dump_read(struct evsel *evsel, union perf_event *event) return; printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid, - perf_evsel__name(evsel), - event->read.value); + evsel__name(evsel), event->read.value); if (!evsel) return; diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c new file mode 100644 index 000000000000..1580a3cbec2d --- /dev/null +++ b/tools/perf/util/sideband_evlist.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "util/debug.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/mmap.h" +#include "util/perf_api_probe.h" +#include <perf/mmap.h> +#include <linux/perf_event.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <stdbool.h> + +int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr, + evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + if (!attr->sample_id_all) { + pr_warning("enabling sample_id_all for all side band events\n"); + attr->sample_id_all = 1; + } + + evsel = perf_evsel__new_idx(attr, evlist->core.nr_entries); + if (!evsel) + return -1; + + evsel->side_band.cb = cb; + evsel->side_band.data = data; + evlist__add(evlist, evsel); + return 0; +} + +static void *perf_evlist__poll_thread(void *arg) +{ + struct evlist *evlist = arg; + bool draining = false; + int i, done = 0; + /* + * In order to read symbols from other namespaces perf to needs to call + * setns(2). This isn't permitted if the struct_fs has multiple users. + * unshare(2) the fs so that we may continue to setns into namespaces + * that we're observing when, for instance, reading the build-ids at + * the end of a 'perf record' session. + */ + unshare(CLONE_FS); + + while (!done) { + bool got_data = false; + + if (evlist->thread.done) + draining = true; + + if (!draining) + evlist__poll(evlist, 1000); + + for (i = 0; i < evlist->core.nr_mmaps; i++) { + struct mmap *map = &evlist->mmap[i]; + union perf_event *event; + + if (perf_mmap__read_init(&map->core)) + continue; + while ((event = perf_mmap__read_event(&map->core)) != NULL) { + struct evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (evsel && evsel->side_band.cb) + evsel->side_band.cb(event, evsel->side_band.data); + else + pr_warning("cannot locate proper evsel for the side band event\n"); + + perf_mmap__consume(&map->core); + got_data = true; + } + perf_mmap__read_done(&map->core); + } + + if (draining && !got_data) + break; + } + return NULL; +} + +void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_id_all = 1; + evsel->core.attr.watermark = 1; + evsel->core.attr.wakeup_watermark = 1; + evsel->side_band.cb = cb; + evsel->side_band.data = data; + } +} + +int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target) +{ + struct evsel *counter; + + if (!evlist) + return 0; + + if (perf_evlist__create_maps(evlist, target)) + goto out_delete_evlist; + + if (evlist->core.nr_entries > 1) { + bool can_sample_identifier = perf_can_sample_identifier(); + + evlist__for_each_entry(evlist, counter) + evsel__set_sample_id(counter, can_sample_identifier); + + perf_evlist__set_id_pos(evlist); + } + + evlist__for_each_entry(evlist, counter) { + if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0) + goto out_delete_evlist; + } + + if (evlist__mmap(evlist, UINT_MAX)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (evsel__enable(counter)) + goto out_delete_evlist; + } + + evlist->thread.done = 0; + if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) + goto out_delete_evlist; + + return 0; + +out_delete_evlist: + evlist__delete(evlist); + evlist = NULL; + return -1; +} + +void perf_evlist__stop_sb_thread(struct evlist *evlist) +{ + if (!evlist) + return; + evlist->thread.done = 1; + pthread_join(evlist->thread.th, NULL); + evlist__delete(evlist); +} diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 3b791ef2cd50..20bacd5972ad 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,6 +15,9 @@ int smt_on(void) if (cached) return cached_result; + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + goto done; + ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -24,13 +27,13 @@ int smt_on(void) snprintf(fn, sizeof fn, "devices/system/cpu/cpu%d/topology/core_cpus", cpu); - if (access(fn, F_OK) == -1) { + if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + if (sysfs__read_str(fn, &str, &strlen) < 0) + continue; } - if (sysfs__read_str(fn, &str, &strlen) < 0) - continue; /* Entry is hex, but does not have 0x, so need custom parser */ siblings = strtoull(str, NULL, 16); free(str); @@ -42,6 +45,7 @@ int smt_on(void) } if (!cached) { cached_result = 0; +done: cached = true; } return cached_result; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index f14cc728c358..c1f8879f92cc 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip) return (int64_t)(right_ip - left_ip); } -static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) +int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) { if (!sym_l || !sym_r) return cmp_null(sym_l, sym_r); @@ -300,8 +300,14 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms, if (verbose > 0) { char o = map ? dso__symtab_origin(map->dso) : '!'; + u64 rip = ip; + + if (map && map->dso && map->dso->kernel + && map->dso->adjust_symbols) + rip = map->unmap_ip(map, ip); + ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4 + 2, ip, o); + BITS_PER_LONG / 4 + 2, rip, o); } ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); @@ -2354,7 +2360,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name) evsel = evlist__first(evlist); while (--nr > 0) - evsel = perf_evsel__next(evsel); + evsel = evsel__next(evsel); return evsel; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index cfa6ac6f7d06..66d39c4cfe2b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -311,5 +311,7 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); +int64_t +_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r); char *hist_entry__srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 9e757d18d713..3c6976f7574c 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -237,8 +237,6 @@ static bool valid_only_metric(const char *unit) if (!unit) return false; if (strstr(unit, "/sec") || - strstr(unit, "hz") || - strstr(unit, "Hz") || strstr(unit, "CPUs utilized")) return false; return true; @@ -248,7 +246,7 @@ static const char *fixunit(char *buf, struct evsel *evsel, const char *unit) { if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + snprintf(buf, 1024, "%s %s", evsel__name(evsel), unit); return buf; } @@ -335,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config, if (config->aggr_mode == AGGR_GLOBAL) return 0; - for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + for (i = 0; i < evsel__nr_cpus(evsel); i++) { int cpu2 = evsel__cpus(evsel)->map[i]; if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id) @@ -369,7 +367,7 @@ static void abs_printout(struct perf_stat_config *config, config->csv_output ? 0 : config->unit_width, evsel->unit, config->csv_sep); - fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); + fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel)); print_cgroup(config, evsel); } @@ -463,8 +461,7 @@ static void printout(struct perf_stat_config *config, int id, int nr, counter->unit, config->csv_sep); fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, - perf_evsel__name(counter)); + config->csv_output ? 0 : -25, evsel__name(counter)); print_cgroup(config, counter); @@ -510,7 +507,7 @@ static void aggr_update_shadow(struct perf_stat_config *config, id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { val = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); if (s2 != id) continue; @@ -561,11 +558,11 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + if (strcmp(evsel__name(alias), evsel__name(counter)) || alias->scale != counter->scale || alias->cgrp != counter->cgrp || strcmp(alias->unit, counter->unit) || - perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) || + evsel__is_clock(alias) != evsel__is_clock(counter) || !strcmp(alias->pmu_name, counter->pmu_name)) break; alias->merged_stat = true; @@ -601,7 +598,7 @@ static void aggr_cb(struct perf_stat_config *config, struct aggr_data *ad = data; int cpu, s2; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct perf_counts_values *counts; s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); @@ -849,7 +846,7 @@ static void print_counter(struct perf_stat_config *config, double uval; int cpu; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { struct aggr_data ad = { .cpu = cpu }; if (!collect_data(config, counter, counter_cb, &ad)) @@ -1150,7 +1147,7 @@ static void print_percore_thread(struct perf_stat_config *config, int s, s2, id; bool first = true; - for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { + for (int i = 0; i < evsel__nr_cpus(counter); i++) { s2 = config->aggr_get_id(config, evsel__cpus(counter), i); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 03ecb8cd0eec..129b8c5f2538 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -216,9 +216,9 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; - if (perf_evsel__is_clock(counter)) + if (evsel__is_clock(counter)) update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns); - else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) + else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); @@ -241,25 +241,25 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) + else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) + else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) + else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); - else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) + else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) + else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); @@ -336,7 +336,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) metric_events = counter->metric_events; if (!metric_events) { if (expr__find_other(counter->metric_expr, counter->name, - &metric_names, &num_metric_names) < 0) + &metric_names, &num_metric_names, 1) < 0) continue; metric_events = calloc(sizeof(struct evsel *), @@ -723,13 +723,14 @@ static void generic_metric(struct perf_stat_config *config, char *name, const char *metric_name, const char *metric_unit, + int runtime, double avg, int cpu, struct perf_stat_output_ctx *out, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct parse_ctx pctx; + struct expr_parse_ctx pctx; double ratio, scale; int i; void *ctxp = out->ctx; @@ -777,7 +778,7 @@ static void generic_metric(struct perf_stat_config *config, } if (!metric_events[i]) { - if (expr__parse(&ratio, &pctx, metric_expr) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { char *unit; char metric_bf[64]; @@ -786,9 +787,13 @@ static void generic_metric(struct perf_stat_config *config, &unit, &scale) >= 0) { ratio *= scale; } - - scnprintf(metric_bf, sizeof(metric_bf), + if (strstr(metric_expr, "?")) + scnprintf(metric_bf, sizeof(metric_bf), + "%s %s_%d", unit, metric_name, runtime); + else + scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); + print_metric(config, ctxp, NULL, "%8.1f", metric_bf, ratio); } else { @@ -828,7 +833,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct metric_event *me; int num = 1; - if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { + if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) { @@ -853,7 +858,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, "stalled cycles per insn", ratio); } - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) print_branch_misses(config, cpu, evsel, avg, out, st); else @@ -908,7 +913,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_ll_cache_misses(config, cpu, evsel, avg, out, st); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { + } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) @@ -919,11 +924,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { + } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { + } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { + } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { @@ -974,7 +979,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio = total / avg; print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__is_clock(evsel)) { + } else if (evsel__is_clock(evsel)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", avg / (ratio * evsel->scale)); @@ -1022,7 +1027,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, NULL, avg, cpu, out, st); + evsel->metric_name, NULL, 1, avg, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; @@ -1051,7 +1056,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - mexp->metric_unit, avg, cpu, out, st); + mexp->metric_unit, mexp->runtime, avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5f26137b8d60..774468341851 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -108,7 +108,7 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { - if (!strcmp(perf_evsel__name(evsel), id_str[i])) { + if (!strcmp(evsel__name(evsel), id_str[i])) { ps->id = i; break; } @@ -173,7 +173,7 @@ static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel) static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { - int ncpus = perf_evsel__nr_cpus(evsel); + int ncpus = evsel__nr_cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); if (perf_evsel__alloc_stat_priv(evsel) < 0 || @@ -302,7 +302,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, thread, count); + evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, @@ -334,7 +334,7 @@ static int process_counter_maps(struct perf_stat_config *config, struct evsel *counter) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = perf_evsel__nr_cpus(counter); + int ncpus = evsel__nr_cpus(counter); int cpu, thread; if (counter->core.system_wide) @@ -368,8 +368,10 @@ int perf_stat_process_counter(struct perf_stat_config *config, * interval mode, otherwise overall avg running * averages will be shown for each interval. */ - if (config->interval) - init_stats(ps->res_stats); + if (config->interval) { + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + } if (counter->per_pkg) zero_per_pkg(counter); @@ -382,7 +384,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, -1, aggr); + evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -390,7 +392,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (verbose > 0) { fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", - perf_evsel__name(counter), count[0], count[1], count[2]); + evsel__name(counter), count[0], count[1], count[2]); } /* @@ -507,7 +509,7 @@ int create_perf_stat_counter(struct evsel *evsel, * either manually by us or by kernel via enable_on_exec * set later. */ - if (perf_evsel__is_group_leader(evsel)) { + if (evsel__is_group_leader(evsel)) { attr->disabled = 1; /* @@ -519,7 +521,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); - return perf_evsel__open_per_thread(evsel, evsel->core.threads); + return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 26bc6a0096ce..381da6b39f89 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -566,6 +566,20 @@ void dso__sort_by_name(struct dso *dso) return symbols__sort_by_name(&dso->symbol_names, &dso->symbols); } +/* + * While we find nice hex chars, build a long_val. + * Return number of chars processed. + */ +static int hex2u64(const char *ptr, u64 *long_val) +{ + char *p; + + *long_val = strtoull(ptr, &p, 16); + + return p - ptr; +} + + int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start, u64 size)) @@ -1544,6 +1558,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, return true; case DSO_BINARY_TYPE__BPF_PROG_INFO: + case DSO_BINARY_TYPE__BPF_IMAGE: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a661b122d9d8..89b390623b63 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -37,6 +37,7 @@ #include <string.h> #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <api/fs/fs.h> +#include <api/io.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -71,7 +72,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool, static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, pid_t *tgid, pid_t *ppid) { - char filename[PATH_MAX]; char bf[4096]; int fd; size_t size = 0; @@ -81,11 +81,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, *tgid = -1; *ppid = -1; - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + snprintf(bf, sizeof(bf), "/proc/%d/status", pid); - fd = open(filename, O_RDONLY); + fd = open(bf, O_RDONLY); if (fd < 0) { - pr_debug("couldn't open %s\n", filename); + pr_debug("couldn't open %s\n", bf); return -1; } @@ -274,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, return 0; } +static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end, + u32 *prot, u32 *flags, __u64 *offset, + u32 *maj, u32 *min, + __u64 *inode, + ssize_t pathname_size, char *pathname) +{ + __u64 temp; + int ch; + char *start_pathname = pathname; + + if (io__get_hex(io, start) != '-') + return false; + if (io__get_hex(io, end) != ' ') + return false; + + /* map protection and flags bits */ + *prot = 0; + ch = io__get_char(io); + if (ch == 'r') + *prot |= PROT_READ; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 'w') + *prot |= PROT_WRITE; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 'x') + *prot |= PROT_EXEC; + else if (ch != '-') + return false; + ch = io__get_char(io); + if (ch == 's') + *flags = MAP_SHARED; + else if (ch == 'p') + *flags = MAP_PRIVATE; + else + return false; + if (io__get_char(io) != ' ') + return false; + + if (io__get_hex(io, offset) != ' ') + return false; + + if (io__get_hex(io, &temp) != ':') + return false; + *maj = temp; + if (io__get_hex(io, &temp) != ' ') + return false; + *min = temp; + + ch = io__get_dec(io, inode); + if (ch != ' ') { + *pathname = '\0'; + return ch == '\n'; + } + do { + ch = io__get_char(io); + } while (ch == ' '); + while (true) { + if (ch < 0) + return false; + if (ch == '\0' || ch == '\n' || + (pathname + 1 - start_pathname) >= pathname_size) { + *pathname = '\0'; + return true; + } + *pathname++ = ch; + ch = io__get_char(io); + } +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -281,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, struct machine *machine, bool mmap_data) { - char filename[PATH_MAX]; - FILE *fp; unsigned long long t; + char bf[BUFSIZ]; + struct io io; bool truncation = false; unsigned long long timeout = proc_map_timeout * 1000000ULL; int rc = 0; @@ -293,59 +366,52 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (machine__is_default_guest(machine)) return 0; - snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps", - machine->root_dir, pid, pid); + snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps", + machine->root_dir, pid, pid); - fp = fopen(filename, "r"); - if (fp == NULL) { + io.fd = open(bf, O_RDONLY, 0); + if (io.fd < 0) { /* * We raced with a task exiting - just return: */ - pr_debug("couldn't open %s\n", filename); + pr_debug("couldn't open %s\n", bf); return -1; } + io__init(&io, io.fd, bf, sizeof(bf)); event->header.type = PERF_RECORD_MMAP2; t = rdclock(); - while (1) { - char bf[BUFSIZ]; - char prot[5]; - char execname[PATH_MAX]; - char anonstr[] = "//anon"; - unsigned int ino; + while (!io.eof) { + static const char anonstr[] = "//anon"; size_t size; - ssize_t n; - if (fgets(bf, sizeof(bf), fp) == NULL) - break; + /* ensure null termination since stack will be reused. */ + event->mmap2.filename[0] = '\0'; + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + if (!read_proc_maps_line(&io, + &event->mmap2.start, + &event->mmap2.len, + &event->mmap2.prot, + &event->mmap2.flags, + &event->mmap2.pgoff, + &event->mmap2.maj, + &event->mmap2.min, + &event->mmap2.ino, + sizeof(event->mmap2.filename), + event->mmap2.filename)) + continue; if ((rdclock() - t) > timeout) { - pr_warning("Reading %s time out. " + pr_warning("Reading %s/proc/%d/task/%d/maps time out. " "You may want to increase " "the time limit by --proc-map-timeout\n", - filename); + machine->root_dir, pid, pid); truncation = true; goto out; } - /* ensure null termination since stack will be reused. */ - strcpy(execname, ""); - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - /* - * Anon maps don't have the execname. - */ - if (n < 7) - continue; - - event->mmap2.ino = (u64)ino; event->mmap2.ino_generation = 0; /* @@ -356,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, else event->header.misc = PERF_RECORD_MISC_GUEST_USER; - /* map protection and flags bits */ - event->mmap2.prot = 0; - event->mmap2.flags = 0; - if (prot[0] == 'r') - event->mmap2.prot |= PROT_READ; - if (prot[1] == 'w') - event->mmap2.prot |= PROT_WRITE; - if (prot[2] == 'x') - event->mmap2.prot |= PROT_EXEC; - - if (prot[3] == 's') - event->mmap2.flags |= MAP_SHARED; - else - event->mmap2.flags |= MAP_PRIVATE; - - if (prot[2] != 'x') { - if (!mmap_data || prot[0] != 'r') + if ((event->mmap2.prot & PROT_EXEC) == 0) { + if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0) continue; event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; @@ -382,17 +433,17 @@ out: if (truncation) event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; - if (!strcmp(execname, "")) - strcpy(execname, anonstr); + if (!strcmp(event->mmap2.filename, "")) + strcpy(event->mmap2.filename, anonstr); if (hugetlbfs_mnt_len && - !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) { - strcpy(execname, anonstr); + !strncmp(event->mmap2.filename, hugetlbfs_mnt, + hugetlbfs_mnt_len)) { + strcpy(event->mmap2.filename, anonstr); event->mmap2.flags |= MAP_HUGETLB; } - size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); + size = strlen(event->mmap2.filename) + 1; size = PERF_ALIGN(size, sizeof(u64)); event->mmap2.len -= event->mmap.start; event->mmap2.header.size = (sizeof(event->mmap2) - @@ -411,7 +462,7 @@ out: break; } - fclose(fp); + close(io.fd); return rc; } @@ -1130,7 +1181,7 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max); default: break; - }; + } } static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map) diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 0885967d5bc3..1b992bbba4e8 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -80,6 +80,10 @@ struct thread_stack_entry { * @comm: current comm * @arr_sz: size of array if this is the first element of an array * @rstate: used to detect retpolines + * @br_stack_rb: branch stack (ring buffer) + * @br_stack_sz: maximum branch stack size + * @br_stack_pos: current position in @br_stack_rb + * @mispred_all: mark all branches as mispredicted */ struct thread_stack { struct thread_stack_entry *stack; @@ -95,6 +99,10 @@ struct thread_stack { struct comm *comm; unsigned int arr_sz; enum retpoline_state_t rstate; + struct branch_stack *br_stack_rb; + unsigned int br_stack_sz; + unsigned int br_stack_pos; + bool mispred_all; }; /* @@ -126,13 +134,26 @@ static int thread_stack__grow(struct thread_stack *ts) } static int thread_stack__init(struct thread_stack *ts, struct thread *thread, - struct call_return_processor *crp) + struct call_return_processor *crp, + bool callstack, unsigned int br_stack_sz) { int err; - err = thread_stack__grow(ts); - if (err) - return err; + if (callstack) { + err = thread_stack__grow(ts); + if (err) + return err; + } + + if (br_stack_sz) { + size_t sz = sizeof(struct branch_stack); + + sz += br_stack_sz * sizeof(struct branch_entry); + ts->br_stack_rb = zalloc(sz); + if (!ts->br_stack_rb) + return -ENOMEM; + ts->br_stack_sz = br_stack_sz; + } if (thread->maps && thread->maps->machine) { struct machine *machine = thread->maps->machine; @@ -150,7 +171,9 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, } static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, - struct call_return_processor *crp) + struct call_return_processor *crp, + bool callstack, + unsigned int br_stack_sz) { struct thread_stack *ts = thread->ts, *new_ts; unsigned int old_sz = ts ? ts->arr_sz : 0; @@ -176,7 +199,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, ts += cpu; if (!ts->stack && - thread_stack__init(ts, thread, crp)) + thread_stack__init(ts, thread, crp, callstack, br_stack_sz)) return NULL; return ts; @@ -319,6 +342,9 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) if (!crp) { ts->cnt = 0; + ts->br_stack_pos = 0; + if (ts->br_stack_rb) + ts->br_stack_rb->nr = 0; return 0; } @@ -353,8 +379,33 @@ int thread_stack__flush(struct thread *thread) return err; } +static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags, + u64 from_ip, u64 to_ip) +{ + struct branch_stack *bs = ts->br_stack_rb; + struct branch_entry *be; + + if (!ts->br_stack_pos) + ts->br_stack_pos = ts->br_stack_sz; + + ts->br_stack_pos -= 1; + + be = &bs->entries[ts->br_stack_pos]; + be->from = from_ip; + be->to = to_ip; + be->flags.value = 0; + be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); + be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); + /* No support for mispredict */ + be->flags.mispred = ts->mispred_all; + + if (bs->nr < ts->br_stack_sz) + bs->nr += 1; +} + int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, - u64 to_ip, u16 insn_len, u64 trace_nr) + u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, + unsigned int br_stack_sz, bool mispred_all) { struct thread_stack *ts = thread__stack(thread, cpu); @@ -362,12 +413,13 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, return -EINVAL; if (!ts) { - ts = thread_stack__new(thread, cpu, NULL); + ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz); if (!ts) { pr_warning("Out of memory: no thread stack\n"); return -ENOMEM; } ts->trace_nr = trace_nr; + ts->mispred_all = mispred_all; } /* @@ -381,8 +433,14 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, ts->trace_nr = trace_nr; } - /* Stop here if thread_stack__process() is in use */ - if (ts->crp) + if (br_stack_sz) + thread_stack__update_br_stack(ts, flags, from_ip, to_ip); + + /* + * Stop here if thread_stack__process() is in use, or not recording call + * stack. + */ + if (ts->crp || !callstack) return 0; if (flags & PERF_IP_FLAG_CALL) { @@ -430,6 +488,7 @@ static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) { __thread_stack__flush(thread, ts); zfree(&ts->stack); + zfree(&ts->br_stack_rb); } static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) @@ -497,6 +556,199 @@ void thread_stack__sample(struct thread *thread, int cpu, chain->nr = i; } +/* + * Hardware sample records, created some time after the event occurred, need to + * have subsequent addresses removed from the call chain. + */ +void thread_stack__sample_late(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, + u64 sample_ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + u64 sample_context = callchain_context(sample_ip, kernel_start); + u64 last_context, context, ip; + size_t nr = 0, j; + + if (sz < 2) { + chain->nr = 0; + return; + } + + if (!ts) + goto out; + + /* + * When tracing kernel space, kernel addresses occur at the top of the + * call chain after the event occurred but before tracing stopped. + * Skip them. + */ + for (j = 1; j <= ts->cnt; j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context == PERF_CONTEXT_USER || + (context == sample_context && ip == sample_ip)) + break; + } + + last_context = sample_ip; /* Use sample_ip as an invalid context */ + + for (; nr < sz && j <= ts->cnt; nr++, j++) { + ip = ts->stack[ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (nr >= sz - 1) + break; + chain->ips[nr++] = context; + last_context = context; + } + chain->ips[nr] = ip; + } +out: + if (nr) { + chain->nr = nr; + } else { + chain->ips[0] = sample_context; + chain->ips[1] = sample_ip; + chain->nr = 2; + } +} + +void thread_stack__br_sample(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + const size_t bsz = sizeof(struct branch_entry); + struct branch_stack *src; + struct branch_entry *be; + unsigned int nr; + + dst->nr = 0; + + if (!ts) + return; + + src = ts->br_stack_rb; + if (!src->nr) + return; + + dst->nr = min((unsigned int)src->nr, sz); + + be = &dst->entries[0]; + nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); + memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); + + if (src->nr >= ts->br_stack_sz) { + sz -= nr; + be = &dst->entries[nr]; + nr = min(ts->br_stack_pos, sz); + memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); + } +} + +/* Start of user space branch entries */ +static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start) +{ + if (!*start) + *start = be->to && be->to < kernel_start; + + return *start; +} + +/* + * Start of branch entries after the ip fell in between 2 branches, or user + * space branch entries. + */ +static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start, + bool *start, struct branch_entry *nb) +{ + if (!*start) { + *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || + be->from < kernel_start || + (be->to && be->to < kernel_start); + } + + return *start; +} + +/* + * Hardware sample records, created some time after the event occurred, need to + * have subsequent addresses removed from the branch stack. + */ +void thread_stack__br_sample_late(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz, + u64 ip, u64 kernel_start) +{ + struct thread_stack *ts = thread__stack(thread, cpu); + struct branch_entry *d, *s, *spos, *ssz; + struct branch_stack *src; + unsigned int nr = 0; + bool start = false; + + dst->nr = 0; + + if (!ts) + return; + + src = ts->br_stack_rb; + if (!src->nr) + return; + + spos = &src->entries[ts->br_stack_pos]; + ssz = &src->entries[ts->br_stack_sz]; + + d = &dst->entries[0]; + s = spos; + + if (ip < kernel_start) { + /* + * User space sample: start copying branch entries when the + * branch is in user space. + */ + for (s = spos; s < ssz && nr < sz; s++) { + if (us_start(s, kernel_start, &start)) { + *d++ = *s; + nr += 1; + } + } + + if (src->nr >= ts->br_stack_sz) { + for (s = &src->entries[0]; s < spos && nr < sz; s++) { + if (us_start(s, kernel_start, &start)) { + *d++ = *s; + nr += 1; + } + } + } + } else { + struct branch_entry *nb = NULL; + + /* + * Kernel space sample: start copying branch entries when the ip + * falls in between 2 branches (or the branch is in user space + * because then the start must have been missed). + */ + for (s = spos; s < ssz && nr < sz; s++) { + if (ks_start(s, ip, kernel_start, &start, nb)) { + *d++ = *s; + nr += 1; + } + nb = s; + } + + if (src->nr >= ts->br_stack_sz) { + for (s = &src->entries[0]; s < spos && nr < sz; s++) { + if (ks_start(s, ip, kernel_start, &start, nb)) { + *d++ = *s; + nr += 1; + } + nb = s; + } + } + } + + dst->nr = nr; +} + struct call_return_processor * call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), void *data) @@ -864,7 +1116,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, } if (!ts) { - ts = thread_stack__new(thread, sample->cpu, crp); + ts = thread_stack__new(thread, sample->cpu, crp, true, 0); if (!ts) return -ENOMEM; ts->comm = comm; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index e1ec5a58f1b2..3bc47a42af8e 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -81,10 +81,19 @@ struct call_return_processor { }; int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, - u64 to_ip, u16 insn_len, u64 trace_nr); + u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, + unsigned int br_stack_sz, bool mispred_all); void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr); void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain, size_t sz, u64 ip, u64 kernel_start); +void thread_stack__sample_late(struct thread *thread, int cpu, + struct ip_callchain *chain, size_t sz, u64 ip, + u64 kernel_start); +void thread_stack__br_sample(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz); +void thread_stack__br_sample_late(struct thread *thread, int cpu, + struct branch_stack *dst, unsigned int sz, + u64 sample_ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread, int cpu); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 28b719388028..665e5c0618ed 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->tid = tid; thread->ppid = -1; thread->cpu = -1; + thread->lbr_stitch_enable = false; INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); init_rwsem(&thread->namespaces_lock); @@ -110,6 +111,7 @@ void thread__delete(struct thread *thread) exit_rwsem(&thread->namespaces_lock); exit_rwsem(&thread->comm_lock); + thread__free_stitch_list(thread); free(thread); } @@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine, return dso__data_read_offset(al.map->dso, machine, offset, buf, len); } + +void thread__free_stitch_list(struct thread *thread) +{ + struct lbr_stitch *lbr_stitch = thread->lbr_stitch; + struct stitch_list *pos, *tmp; + + if (!lbr_stitch) + return; + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) { + list_del_init(&pos->node); + free(pos); + } + + list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) { + list_del_init(&pos->node); + free(pos); + } + + zfree(&lbr_stitch->prev_lbr_cursor); + zfree(&thread->lbr_stitch); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 20b96b5d1f15..b066fb30d203 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -13,6 +13,8 @@ #include <strlist.h> #include <intlist.h> #include "rwsem.h" +#include "event.h" +#include "callchain.h" struct addr_location; struct map; @@ -20,6 +22,13 @@ struct perf_record_namespaces; struct thread_stack; struct unwind_libunwind_ops; +struct lbr_stitch { + struct list_head lists; + struct list_head free_lists; + struct perf_sample prev_sample; + struct callchain_cursor_node *prev_lbr_cursor; +}; + struct thread { union { struct rb_node rb_node; @@ -46,6 +55,10 @@ struct thread { struct srccode_state srccode_state; bool filter; int filter_entry_depth; + + /* LBR call stack stitch */ + bool lbr_stitch_enable; + struct lbr_stitch *lbr_stitch; }; struct machine; @@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread) return false; } +void thread__free_stitch_list(struct thread *thread); + #endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 3dce2de9d005..27945eeb0cb5 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -77,7 +77,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) opts->freq ? "Hz" : ""); } - ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel)); + ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel)); ret += SNPRINTF(bf + ret, size - ret, "], "); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f117d4f4821e..ff8391208ecd 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -18,7 +18,7 @@ struct perf_session; struct perf_top { struct perf_tool tool; - struct evlist *evlist; + struct evlist *evlist, *sb_evlist; struct record_opts record_opts; struct annotation_options annotation_opts; struct evswitch evswitch; @@ -36,6 +36,7 @@ struct perf_top { bool use_tui, use_stdio; bool vmlinux_warned; bool dump_symtab; + bool stitch_lbr; struct hist_entry *sym_filter_entry; struct evsel *sym_evsel; struct perf_session *session; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8593d3c200c6..f507dff713c9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -75,7 +75,7 @@ static void skip(int size) r = size > BUFSIZ ? BUFSIZ : size; do_read(buf, r); size -= r; - }; + } } static unsigned int read4(struct tep_handle *pevent) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d707c9624dd9..37a9492edb3e 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -290,6 +290,7 @@ int perf_event_paranoid(void) bool perf_event_paranoid_check(int max_level) { return perf_cap__capable(CAP_SYS_ADMIN) || + perf_cap__capable(CAP_PERFMON) || perf_event_paranoid() <= max_level; } diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index d3755ea70d4d..0ba61a2c4d81 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -62,7 +62,7 @@ int cmd_info(int argc, char **argv) default: print_wrong_arg_exit(); } - }; + } if (!params.params) params.params = 0x7; diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index 3cca6f715dd9..052044d7e012 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -72,7 +72,7 @@ int cmd_set(int argc, char **argv) default: print_wrong_arg_exit(); } - }; + } if (!params.params) print_wrong_arg_exit(); diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 20f46348271b..5edd35bd9ee9 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -117,7 +117,7 @@ static int amd_fam14h_get_pci_info(struct cstate *state, break; default: return -1; - }; + } return 0; } diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index a65f7d011513..8b42c2f0a5b0 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -53,7 +53,7 @@ static int cpuidle_start(void) dprint("CPU %d - State: %d - Val: %llu\n", cpu, state, previous_count[cpu][state]); } - }; + } return 0; } @@ -72,7 +72,7 @@ static int cpuidle_stop(void) dprint("CPU %d - State: %d - Val: %llu\n", cpu, state, previous_count[cpu][state]); } - }; + } return 0; } @@ -172,7 +172,7 @@ static struct cpuidle_monitor *cpuidle_register(void) cpuidle_cstates[num].id = num; cpuidle_cstates[num].get_count_percent = cpuidle_get_count_percent; - }; + } /* Free this at program termination */ previous_count = malloc(sizeof(long long *) * cpu_count); diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index 97ad3233a521..55e55b6b42f9 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -79,7 +79,7 @@ static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val, break; default: return -1; - }; + } if (read_msr(cpu, msr, val)) return -1; return 0; diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c index 114271165182..16eaf006f61f 100644 --- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -91,7 +91,7 @@ static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val, break; default: return -1; - }; + } if (read_msr(cpu, msr, val)) return -1; diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index df8b223cc096..811d63ab17a7 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -77,7 +77,7 @@ static int snb_get_count(enum intel_snb_id id, unsigned long long *val, break; default: return -1; - }; + } if (read_msr(cpu, msr, val)) return -1; return 0; diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile index 845541544570..b5310832c19c 100644 --- a/tools/power/pm-graph/Makefile +++ b/tools/power/pm-graph/Makefile @@ -41,6 +41,10 @@ uninstall : if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \ rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \ fi; + rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/* + if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \ + rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \ + fi; rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/* if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \ rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \ diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README index 96259f6e5715..afe6beb40ad9 100644 --- a/tools/power/pm-graph/README +++ b/tools/power/pm-graph/README @@ -1,7 +1,12 @@ - p m - g r a p h + _ + _ __ _ __ ___ __ _ _ __ __ _ _ __ | |__ + | '_ \| '_ ` _ \ _____ / _` | '__/ _` | '_ \| '_ \ + | |_) | | | | | |_____| (_| | | | (_| | |_) | | | | + | .__/|_| |_| |_| \__, |_| \__,_| .__/|_| |_| + |_| |___/ |_| pm-graph: suspend/resume/boot timing analysis tools - Version: 5.5 + Version: 5.6 Author: Todd Brandt <todd.e.brandt@intel.com> Home Page: https://01.org/pm-graph @@ -18,10 +23,6 @@ - upstream version in git: https://github.com/intel/pm-graph/ - Requirements: - - runs with python2 or python3, choice is made by /usr/bin/python link - - python2 now requires python-configparser be installed - Table of Contents - Overview - Setup @@ -29,6 +30,8 @@ - Basic Usage - Dev Mode Usage - Proc Mode Usage + - Endurance Testing + - Usage Examples - Configuration Files - Usage Examples - Config File Options @@ -54,15 +57,18 @@ | SETUP | ------------------------------------------------------------------ - These packages are required to execute the scripts + Package Requirements + - runs with python2 or python3, choice is made by /usr/bin/python link - python - - python-requests + - python-configparser (for python2 sleepgraph) + - python-requests (for googlesheet.py) + - linux-tools-common (for turbostat usage in sleepgraph) Ubuntu: - sudo apt-get install python python-requests + sudo apt-get install python python-configparser python-requests linux-tools-common Fedora: - sudo dnf install python python-requests + sudo dnf install python python-configparser python-requests linux-tools-common The tools can most easily be installed via git clone and make install @@ -190,6 +196,104 @@ _______________ %> sudo ./sleepgraph.py -config config/suspend-proc.cfg +------------------------------------------------------------------ +| ENDURANCE TESTING | +------------------------------------------------------------------ + + The best way to gauge the health of a system is to run a series of + suspend/resumes over an extended period and analyze the behavior. This can be + accomplished with sleepgraph's -multi argument. You specify two numbers: the + number of tests to run OR the duration in days, hours, or minutes, and the + delay in seconds between them. For instance, -multi 20 5: execute 20 tests with + a 5 second delay between each, or -multi 24h 0: execute tests over a 24 hour + period with no delay between tests. You can include any other options you like + to generate the data you want. It's most useful to collect dev mode timelines + as the kprobes don't alter the performance much and you get more insight. + + On completion, the output folder contains a series of folders for the + individual test data and a set of summary pages in the root. The summary.html + file is a tabular list of the tests with relevant info and links. The + summary-issue.html and summary-devices.html files include data taken from + all tests on kernel issues and device performance. The folder looks like this: + + suspend-xN-{date}-{time}: + summary.html + summary-issues.html + summary-devices.html + suspend-{date}-{time} (1) + suspend-{date}-{time} (2) + ... + + These are the relevant arguments to use for testing: + + -m mode + Mode to initiate for suspend e.g. mem, freeze, standby (default: mem). + + -rtcwake t + Use rtcwake to autoresume after t seconds (default: 15). + + -gzip (optional) + Gzip the trace and dmesg logs to save space. The tool can also read in + gzipped logs for processing. This reduces the multitest folder size. + + -dev (optional) + Add kernel source calls and threads to the timeline (default: disabled). + + -multi n d + Execute n consecutive tests at d seconds intervals. The outputs will be + created in a new subdirectory: suspend-xN-{date}-{time}. When the multitest + run is done, the -summary command is called automatically to create summary + html files for all the data (unless you use -skiphtml). -skiphtml will + speed up the testing by not creating timelines or summary html files. You + can then run the tool again at a later time with -summary and -genhtml to + create the timelines. + + -skiphtml (optional) + Run the test and capture the trace logs, but skip the timeline and summary + html generation. This can greatly speed up overall testing. You can then + copy the data to a faster host machine and run -summary -genhtml to + generate the timelines and summary. + + These are the relevant commands to use after testing is complete: + + -summary indir + Generate or regenerate the summary for a -multi test run. Creates three + files: summary.html, summary-issues.html, and summary-devices.html in the + current folder. summary.html is a table of tests with relevant info sorted + by kernel/host/mode, and links to the test html files. summary-issues.html + is a list of kernel issues found in dmesg from all the tests. + summary-devices.html is a list of devices and times from all the tests. + + -genhtml + Used with -summary to regenerate any missing html timelines from their + dmesg and ftrace logs. This will require a significant amount of time if + there are thousands of tests. + +Usage Examples +_______________ + + A multitest is initiated like this: + + %> sudo ./sleepgraph.py -m mem -rtcwake 10 -dev -gzip -multi 2000 0 + + or you can skip timeline generation in order to speed things up + + %> sudo ./sleepgraph.py -m mem -rtcwake 10 -dev -gzip -multi 2000 0 -skiphtml + + The tool will produce an output folder with all the test subfolders inside. + Each test subfolder contains the dmesg/ftrace logs and/or the html timeline + depending on whether you used the -skiphtml option. The root folder contains + the summary.html files. + + The summary for an existing multitest is generated like this: + + %> cd suspend-x2000-{date}-{time} + %> sleepgraph.py -summary . + + or if you need to generate the html timelines you can use -genhtml + + %> cd suspend-xN-{date}-{time} + %> sleepgraph.py -summary . -genhtml ------------------------------------------------------------------ | CONFIGURATION FILES | diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py index d3b99a1e92d6..2823cd3122f7 100755 --- a/tools/power/pm-graph/bootgraph.py +++ b/tools/power/pm-graph/bootgraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only # # Tool for analyzing boot timing diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg index 4f80ad7d7275..962e5768681c 100644 --- a/tools/power/pm-graph/config/custom-timeline-functions.cfg +++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg @@ -125,7 +125,7 @@ acpi_suspend_begin: suspend_console: acpi_pm_prepare: syscore_suspend: -arch_enable_nonboot_cpus_end: +arch_thaw_secondary_cpus_end: syscore_resume: acpi_pm_finish: resume_console: diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 43aee64316df..5126271de98a 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -74,8 +74,10 @@ after the test is complete. Switch the display to the requested mode for the test using the xset command. This helps maintain the consistency of test data for better comparison. .TP -\fB-skiphtml\fR -Run the test and capture the trace logs, but skip the timeline generation. +\fB-wifi\fR +If a wifi connection is available, check that it reconnects after resume. Include +the reconnect time in the total resume time calculation and treat wifi timeouts +as resume failures. .SS "advanced" .TP @@ -117,8 +119,24 @@ Include \fIt\fR ms delay before 1st suspend (default: 0 ms). Include \fIt\fR ms delay after last resume (default: 0 ms). .TP \fB-multi \fIn d\fR -Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will -be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}. +Used for endurance testing. If \fIn\fR is entirely numeric, it's treated as a count: +Execute \fIn\fR consecutive tests at \fId\fR second intervals. +If \fIn\fR is an integer followed by a "d", "h", or "m", it's treated as a duration: +Execute tests continuously over \fIn\fR days, hours, or minutes at \fId\fR second intervals. +The outputs will be created in a new subdirectory, for count: suspend-{date}-{time}-xN, +for duration: suspend-{date}-{time}-Nm. When the multitest run is done, the \fI-summary\fR +command is called automatically to create summary html files for all the data (unless you +use \fI-skiphtml\fR). \fI-skiphtml\fR will speed up the testing by not creating timelines +or summary html files. You can then run the tool again at a later time with \fI-summary\fR +and \fI-genhtml\fR to create the timelines. +.TP +\fB-maxfail \fIn\fR +Abort a -multi run after \fIn\fR consecutive fails. 0 means never abort (default = 0). +.TP +\fB-skiphtml\fR +Run the test and capture the trace logs, but skip the timeline generation. +You can generate the html timelines later with \fI-dmesg\fR & \fI-ftrace\fR, or +by running \fI-summary\fR and \fI-genhtml\fR. .SS "ftrace debug" .TP @@ -173,11 +191,20 @@ Set trace buffer size to N kilo-bytes (default: all of free memory up to 3GB) .SH COMMANDS .TP \fB-summary \fIindir\fR -Create a summary page of all tests in \fIindir\fR. Creates summary.html -in the current folder. The output page is a table of tests with -suspend and resume values sorted by suspend mode, host, and kernel. -Includes test averages by mode and links to the test html files. -Use -genhtml to include tests with missing html. +Create a set of summary pages for all tests in \fIindir\fR recursively. +Creates summary.html, summary-issues.html, and summary-devices.html in the current folder. +summary.html is a table of tests with relevant info sorted by kernel/host/mode, +and links to the test html files. It identifies the minimum, maximum, and median +suspend and resume times for you with highlights and links in the header. +summary-issues.html is a list of kernel issues found in dmesg from all the tests. +summary-devices.html is a list of devices and times from all the tests. + +Use \fI-genhtml\fR to regenerate any tests with missing html. +.TP +\fB-genhtml\fR +Used with \fI-summary\fR to regenerate any missing html timelines from their +dmesg and ftrace logs. This will require a significant amount of time if there +are thousands of tests. .TP \fB-modes\fR List available suspend modes. @@ -189,10 +216,7 @@ with any options you intend to use to see if they will work. \fB-fpdt\fR Print out the contents of the ACPI Firmware Performance Data Table. .TP -\fB-battery\fR -Print out battery status and current charge. -.TP -\fB-wifi\fR +\fB-wificheck\fR Print out wifi status and connection details. .TP \fB-xon/-xoff/-xstandby/-xsuspend\fR @@ -208,6 +232,9 @@ Print out system info extracted from BIOS. Reads /dev/mem directly instead of go \fB-devinfo\fR Print out the pm settings of all devices which support runtime suspend. .TP +\fB-cmdinfo\fR +Print out all the platform data collected from the system that makes it into the logs. +.TP \fB-flist\fR Print the list of ftrace functions currently being captured. Functions that are not available as symbols in the current kernel are shown in red. @@ -272,14 +299,20 @@ Run two suspends back to back, include a 500ms delay before, after, and in betwe .IP \f(CW$ sudo sleepgraph -m mem -rtcwake 15 -x2 -predelay 500 -x2delay 500 -postdelay 500\fR .PP +Execute a suspend using a custom command. +.IP +\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR +.PP + +.SS "endurance testing using -multi" +.PP Do a batch run of 10 freezes with 30 seconds delay between runs. .IP \f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 10 30\fR .PP -Execute a suspend using a custom command. +Do a batch run of freezes for 24 hours. .IP -\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR -.PP +\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 24h 0\fR .SS "adding callgraph data" Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger. diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index f7d1c1f62f86..602e64b68ba7 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only # # Tool for analyzing suspend/resume timing @@ -58,7 +58,7 @@ import re import platform import signal import codecs -from datetime import datetime +from datetime import datetime, timedelta import struct import configparser import gzip @@ -81,12 +81,13 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.5' + version = '5.6' ansi = False rs = 0 display = '' gzip = False sync = False + wifi = False verbose = False testlog = True dmesglog = True @@ -97,7 +98,8 @@ class SystemValues: cgphase = '' cgtest = -1 cgskip = '' - multitest = {'run': False, 'count': 0, 'delay': 0} + maxfail = 0 + multitest = {'run': False, 'count': 1000000, 'delay': 0} max_graph_depth = 0 callloopmaxgap = 0.0001 callloopmaxlen = 0.005 @@ -148,7 +150,7 @@ class SystemValues: x2delay = 0 skiphtml = False usecallgraph = False - ftopfunc = 'suspend_devices_and_enter' + ftopfunc = 'pm_suspend' ftop = False usetraceevents = False usetracemarkers = True @@ -164,6 +166,8 @@ class SystemValues: predelay = 0 postdelay = 0 pmdebug = '' + tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f' + tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f' tracefuncs = { 'sys_sync': {}, 'ksys_sync': {}, @@ -183,16 +187,18 @@ class SystemValues: 'acpi_s2idle_sync': {}, 'acpi_s2idle_begin': {}, 'acpi_s2idle_prepare': {}, + 'acpi_s2idle_prepare_late': {}, 'acpi_s2idle_wake': {}, 'acpi_s2idle_wakeup': {}, 'acpi_s2idle_restore': {}, + 'acpi_s2idle_restore_early': {}, 'hibernate_preallocate_memory': {}, 'create_basic_memory_bitmaps': {}, 'swsusp_write': {}, 'suspend_console': {}, 'acpi_pm_prepare': {}, 'syscore_suspend': {}, - 'arch_enable_nonboot_cpus_end': {}, + 'arch_thaw_secondary_cpus_end': {}, 'syscore_resume': {}, 'acpi_pm_finish': {}, 'resume_console': {}, @@ -270,12 +276,23 @@ class SystemValues: 'intel_opregion_init': {}, 'intel_fbdev_set_suspend': {}, } + infocmds = [ + [0, 'kparams', 'cat', '/proc/cmdline'], + [0, 'mcelog', 'mcelog'], + [0, 'pcidevices', 'lspci', '-tv'], + [0, 'usbdevices', 'lsusb', '-t'], + [1, 'interrupts', 'cat', '/proc/interrupts'], + [1, 'wakeups', 'cat', '/sys/kernel/debug/wakeup_sources'], + [2, 'gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/*'], + [2, 'suspendstats', 'sh', '-c', 'grep -v invalid /sys/power/suspend_stats/*'], + [2, 'cpuidle', 'sh', '-c', 'grep -v invalid /sys/devices/system/cpu/cpu*/cpuidle/state*/s2idle/*'], + [2, 'battery', 'sh', '-c', 'grep -v invalid /sys/class/power_supply/*/*'], + ] cgblacklist = [] kprobes = dict() timeformat = '%.3f' cmdline = '%s %s' % \ (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:])) - kparams = '' sudouser = '' def __init__(self): self.archargs = 'args_'+platform.machine() @@ -295,6 +312,9 @@ class SystemValues: if os.getuid() == 0 and 'SUDO_USER' in os.environ and \ os.environ['SUDO_USER']: self.sudouser = os.environ['SUDO_USER'] + def resetlog(self): + self.logmsg = '' + self.platinfo = [] def vprint(self, msg): self.logmsg += msg+'\n' if self.verbose or msg.startswith('WARNING:'): @@ -304,11 +324,11 @@ class SystemValues: return signame = self.signames[signum] if signum in self.signames else 'UNKNOWN' msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno) - sysvals.outputResult({'error':msg}) + self.outputResult({'error':msg}) sys.exit(3) def signalHandlerInit(self): capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT', - 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'TSTP'] + 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM'] self.signames = dict() for i in capture: s = 'SIG'+i @@ -336,6 +356,8 @@ class SystemValues: self.outputResult({'error':msg}) sys.exit(1) return False + def usable(self, file): + return (os.path.exists(file) and os.path.getsize(file) > 0) def getExec(self, cmd): try: fp = Popen(['which', cmd], stdout=PIPE, stderr=PIPE).stdout @@ -389,12 +411,6 @@ class SystemValues: r = info['bios-release-date'] if 'bios-release-date' in info else '' self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | biosdate:%s | numcpu:%d | memsz:%d | memfr:%d' % \ (m, p, c, b, r, self.cpucount, self.memtotal, self.memfree) - try: - kcmd = open('/proc/cmdline', 'r').read().strip() - except: - kcmd = '' - if kcmd: - self.sysstamp += '\n# kparams | %s' % kcmd def printSystemInfo(self, fatal=False): self.rootCheck(True) out = dmidecode(self.mempath, fatal) @@ -441,6 +457,7 @@ class SystemValues: self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html' if not os.path.isdir(self.testdir): os.makedirs(self.testdir) + self.sudoUserchown(self.testdir) def getValueList(self, value): out = [] for i in value.split(','): @@ -486,7 +503,7 @@ class SystemValues: fp.close() self.dmesgstart = float(ktime) def getdmesg(self, testdata): - op = self.writeDatafileHeader(sysvals.dmesgfile, testdata) + op = self.writeDatafileHeader(self.dmesgfile, testdata) # store all new dmesg lines since initdmesg was called fp = Popen('dmesg', stdout=PIPE).stdout for line in fp: @@ -716,9 +733,10 @@ class SystemValues: if name == f: return True return False - def initFtrace(self): - self.printSystemInfo(False) - pprint('INITIALIZING FTRACE...') + def initFtrace(self, quiet=False): + if not quiet: + sysvals.printSystemInfo(False) + pprint('INITIALIZING FTRACE...') # turn trace off self.fsetVal('0', 'tracing_on') self.cleanupFtrace() @@ -746,7 +764,7 @@ class SystemValues: if tgtsize < 65536: tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus break - pprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)) + self.vprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)) # initialize the callgraph trace if(self.usecallgraph): # set trace type @@ -782,7 +800,8 @@ class SystemValues: if self.usedevsrc: for name in self.dev_tracefuncs: self.defaultKprobe(name, self.dev_tracefuncs[name]) - pprint('INITIALIZING KPROBES...') + if not quiet: + pprint('INITIALIZING KPROBES...') self.addKprobes(self.verbose) if(self.usetraceevents): # turn trace events on @@ -827,21 +846,10 @@ class SystemValues: fw = test['fw'] if(fw): fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) - if 'mcelog' in test: - fp.write('# mcelog %s\n' % test['mcelog']) if 'turbo' in test: fp.write('# turbostat %s\n' % test['turbo']) - if 'bat' in test: - (a1, c1), (a2, c2) = test['bat'] - fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2)) if 'wifi' in test: - wstr = [] - for wifi in test['wifi']: - tmp = [] - for key in sorted(wifi): - tmp.append('%s:%s' % (key, wifi[key])) - wstr.append('|'.join(tmp)) - fp.write('# wifi %s\n' % (','.join(wstr))) + fp.write('# wifi %s\n' % test['wifi']) if test['error'] or len(testdata) > 1: fp.write('# enter_sleep_error %s\n' % test['error']) return fp @@ -901,23 +909,7 @@ class SystemValues: def b64zip(self, data): out = base64.b64encode(codecs.encode(data.encode(), 'zlib')).decode() return out - def mcelog(self, clear=False): - cmd = self.getExec('mcelog') - if not cmd: - return '' - if clear: - call(cmd+' > /dev/null 2>&1', shell=True) - return '' - try: - fp = Popen([cmd], stdout=PIPE, stderr=PIPE).stdout - out = ascii(fp.read()).strip() - fp.close() - except: - return '' - if not out: - return '' - return self.b64zip(out) - def platforminfo(self): + def platforminfo(self, cmdafter): # add platform info on to a completed ftrace file if not os.path.exists(self.ftracefile): return False @@ -1001,31 +993,76 @@ class SystemValues: footer += '# platform-devinfo: %s\n' % self.b64zip(out) # add a line for each of these commands with their outputs - cmds = [ - ['pcidevices', 'lspci', '-tv'], - ['interrupts', 'cat', '/proc/interrupts'], - ['gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/gpe*'], - ] - for cargs in cmds: - name = cargs[0] - cmdline = ' '.join(cargs[1:]) - cmdpath = self.getExec(cargs[1]) - if not cmdpath: + for name, cmdline, info in cmdafter: + footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info)) + + with self.openlog(self.ftracefile, 'a') as fp: + fp.write(footer) + return True + def commonPrefix(self, list): + if len(list) < 2: + return '' + prefix = list[0] + for s in list[1:]: + while s[:len(prefix)] != prefix and prefix: + prefix = prefix[:len(prefix)-1] + if not prefix: + break + if '/' in prefix and prefix[-1] != '/': + prefix = prefix[0:prefix.rfind('/')+1] + return prefix + def dictify(self, text, format): + out = dict() + header = True if format == 1 else False + delim = ' ' if format == 1 else ':' + for line in text.split('\n'): + if header: + header, out['@'] = False, line + continue + line = line.strip() + if delim in line: + data = line.split(delim, 1) + num = re.search(r'[\d]+', data[1]) + if format == 2 and num: + out[data[0].strip()] = num.group() + else: + out[data[0].strip()] = data[1] + return out + def cmdinfo(self, begin, debug=False): + out = [] + if begin: + self.cmd1 = dict() + for cargs in self.infocmds: + delta, name = cargs[0], cargs[1] + cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2]) + if not cmdpath or (begin and not delta): continue - cmd = [cmdpath] + cargs[2:] try: - fp = Popen(cmd, stdout=PIPE, stderr=PIPE).stdout + fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout info = ascii(fp.read()).strip() fp.close() except: continue - if not info: - continue - footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info)) - - with self.openlog(self.ftracefile, 'a') as fp: - fp.write(footer) - return True + if not debug and begin: + self.cmd1[name] = self.dictify(info, delta) + elif not debug and delta and name in self.cmd1: + before, after = self.cmd1[name], self.dictify(info, delta) + dinfo = ('\t%s\n' % before['@']) if '@' in before else '' + prefix = self.commonPrefix(list(before.keys())) + for key in sorted(before): + if key in after and before[key] != after[key]: + title = key.replace(prefix, '') + if delta == 2: + dinfo += '\t%s : %s -> %s\n' % \ + (title, before[key].strip(), after[key].strip()) + else: + dinfo += '%10s (start) : %s\n%10s (after) : %s\n' % \ + (title, before[key], title, after[key]) + dinfo = '\tnothing changed' if not dinfo else dinfo.rstrip() + out.append((name, cmdline, dinfo)) + else: + out.append((name, cmdline, '\tnothing' if not info else info)) + return out def haveTurbostat(self): if not self.tstat: return False @@ -1035,8 +1072,8 @@ class SystemValues: fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr out = ascii(fp.read()).strip() fp.close() - if re.match('turbostat version [0-9\.]* .*', out): - sysvals.vprint(out) + if re.match('turbostat version .*', out): + self.vprint(out) return True return False def turbostat(self): @@ -1056,11 +1093,11 @@ class SystemValues: fp.close() if not keyline or not valline or len(keyline) != len(valline): errmsg = 'unrecognized turbostat output:\n'+rawout.strip() - sysvals.vprint(errmsg) - if not sysvals.verbose: + self.vprint(errmsg) + if not self.verbose: pprint(errmsg) return '' - if sysvals.verbose: + if self.verbose: pprint(rawout.strip()) out = [] for key in keyline: @@ -1068,30 +1105,36 @@ class SystemValues: val = valline[idx] out.append('%s=%s' % (key, val)) return '|'.join(out) - def checkWifi(self): - out = dict() - iwcmd, ifcmd = self.getExec('iwconfig'), self.getExec('ifconfig') - if not iwcmd or not ifcmd: - return out - fp = Popen(iwcmd, stdout=PIPE, stderr=PIPE).stdout - for line in fp: - m = re.match('(?P<dev>\S*) .* ESSID:(?P<ess>\S*)', ascii(line)) - if not m: + def wifiDetails(self, dev): + try: + info = open('/sys/class/net/%s/device/uevent' % dev, 'r').read().strip() + except: + return dev + vals = [dev] + for prop in info.split('\n'): + if prop.startswith('DRIVER=') or prop.startswith('PCI_ID='): + vals.append(prop.split('=')[-1]) + return ':'.join(vals) + def checkWifi(self, dev=''): + try: + w = open('/proc/net/wireless', 'r').read().strip() + except: + return '' + for line in reversed(w.split('\n')): + m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', w.split('\n')[-1]) + if not m or (dev and dev != m.group('dev')): continue - out['device'] = m.group('dev') - if '"' in m.group('ess'): - out['essid'] = m.group('ess').strip('"') - break - fp.close() - if 'device' in out: - fp = Popen([ifcmd, out['device']], stdout=PIPE, stderr=PIPE).stdout - for line in fp: - m = re.match('.* inet (?P<ip>[0-9\.]*)', ascii(line)) - if m: - out['ip'] = m.group('ip') - break - fp.close() - return out + return m.group('dev') + return '' + def pollWifi(self, dev, timeout=60): + start = time.time() + while (time.time() - start) < timeout: + w = self.checkWifi(dev) + if w: + return '%s reconnected %.2f' % \ + (self.wifiDetails(dev), max(0, time.time() - start)) + time.sleep(0.01) + return '%s timeout %d' % (self.wifiDetails(dev), timeout) def errorSummary(self, errinfo, msg): found = False for entry in errinfo: @@ -1113,8 +1156,9 @@ class SystemValues: arr[j] = arr[j]\ .replace('\\', '\\\\').replace(']', '\]').replace('[', '\[')\ .replace('.', '\.').replace('+', '\+').replace('*', '\*')\ - .replace('(', '\(').replace(')', '\)') - mstr = ' '.join(arr) + .replace('(', '\(').replace(')', '\)').replace('}', '\}')\ + .replace('{', '\{') + mstr = ' *'.join(arr) entry = { 'line': msg, 'match': mstr, @@ -1122,6 +1166,44 @@ class SystemValues: 'urls': {self.hostname: [self.htmlfile]} } errinfo.append(entry) + def multistat(self, start, idx, finish): + if 'time' in self.multitest: + id = '%d Duration=%dmin' % (idx+1, self.multitest['time']) + else: + id = '%d/%d' % (idx+1, self.multitest['count']) + t = time.time() + if 'start' not in self.multitest: + self.multitest['start'] = self.multitest['last'] = t + self.multitest['total'] = 0.0 + pprint('TEST (%s) START' % id) + return + dt = t - self.multitest['last'] + if not start: + if idx == 0 and self.multitest['delay'] > 0: + self.multitest['total'] += self.multitest['delay'] + pprint('TEST (%s) COMPLETE -- Duration %.1fs' % (id, dt)) + return + self.multitest['total'] += dt + self.multitest['last'] = t + avg = self.multitest['total'] / idx + if 'time' in self.multitest: + left = finish - datetime.now() + left -= timedelta(microseconds=left.microseconds) + else: + left = timedelta(seconds=((self.multitest['count'] - idx) * int(avg))) + pprint('TEST (%s) START - Avg Duration %.1fs, Time left %s' % \ + (id, avg, str(left))) + def multiinit(self, c, d): + sz, unit = 'count', 'm' + if c.endswith('d') or c.endswith('h') or c.endswith('m'): + sz, unit, c = 'time', c[-1], c[:-1] + self.multitest['run'] = True + self.multitest[sz] = getArgInt('multi: n d (exec count)', c, 1, 1000000, False) + self.multitest['delay'] = getArgInt('multi: n d (delay between tests)', d, 0, 3600, False) + if unit == 'd': + self.multitest[sz] *= 1440 + elif unit == 'h': + self.multitest[sz] *= 60 sysvals = SystemValues() switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0'] @@ -1210,25 +1292,30 @@ class Data: 'resume_complete': {'order': 9, 'color': '#FFFFCC'}, } errlist = { - 'HWERROR' : '.*\[ *Hardware Error *\].*', - 'FWBUG' : '.*\[ *Firmware Bug *\].*', - 'BUG' : '.*BUG.*', - 'ERROR' : '.*ERROR.*', - 'WARNING' : '.*WARNING.*', - 'IRQ' : '.*genirq: .*', - 'TASKFAIL': '.*Freezing of tasks *.*', - 'ACPI' : '.*ACPI *(?P<b>[A-Za-z]*) *Error[: ].*', - 'DEVFAIL' : '.* failed to (?P<b>[a-z]*) async: .*', - 'DISKFULL': '.*No space left on device.*', - 'USBERR' : '.*usb .*device .*, error [0-9-]*', - 'ATAERR' : ' *ata[0-9\.]*: .*failed.*', - 'MEIERR' : ' *mei.*: .*failed.*', - 'TPMERR' : '(?i) *tpm *tpm[0-9]*: .*error.*', + 'HWERROR' : r'.*\[ *Hardware Error *\].*', + 'FWBUG' : r'.*\[ *Firmware Bug *\].*', + 'BUG' : r'(?i).*\bBUG\b.*', + 'ERROR' : r'(?i).*\bERROR\b.*', + 'WARNING' : r'(?i).*\bWARNING\b.*', + 'FAULT' : r'(?i).*\bFAULT\b.*', + 'FAIL' : r'(?i).*\bFAILED\b.*', + 'INVALID' : r'(?i).*\bINVALID\b.*', + 'CRASH' : r'(?i).*\bCRASHED\b.*', + 'IRQ' : r'.*\bgenirq: .*', + 'TASKFAIL': r'.*Freezing of tasks *.*', + 'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*', + 'DISKFULL': r'.*\bNo space left on device.*', + 'USBERR' : r'.*usb .*device .*, error [0-9-]*', + 'ATAERR' : r' *ata[0-9\.]*: .*failed.*', + 'MEIERR' : r' *mei.*: .*failed.*', + 'TPMERR' : r'(?i) *tpm *tpm[0-9]*: .*error.*', } def __init__(self, num): idchar = 'abcdefghij' self.start = 0.0 # test start self.end = 0.0 # test end + self.hwstart = 0 # rtc test start + self.hwend = 0 # rtc test end self.tSuspended = 0.0 # low-level suspend start self.tResumed = 0.0 # low-level resume start self.tKernSus = 0.0 # kernel level suspend start @@ -1240,10 +1327,8 @@ class Data: self.stamp = 0 self.outfile = '' self.kerror = False - self.battery = 0 - self.wifi = 0 + self.wifi = dict() self.turbostat = 0 - self.mcelog = 0 self.enterfail = '' self.currphase = '' self.pstl = dict() # process timeline @@ -1308,6 +1393,8 @@ class Data: continue dir = 'suspend' if t < self.tSuspended else 'resume' msg = m.group('msg') + if re.match('capability: warning: .*', msg): + continue for err in self.errlist: if re.match(self.errlist[err], msg): list.append((msg, err, dir, t, i, i)) @@ -1316,17 +1403,26 @@ class Data: msglist = [] for msg, type, dir, t, idx1, idx2 in list: msglist.append(msg) - sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t)) self.errorinfo[dir].append((type, t, idx1, idx2)) if self.kerror: sysvals.dmesglog = True if len(self.dmesgtext) < 1 and sysvals.dmesgfile: lf.close() return msglist - def setStart(self, time): + def setStart(self, time, msg=''): self.start = time - def setEnd(self, time): + if msg: + try: + self.hwstart = datetime.strptime(msg, sysvals.tmstart) + except: + self.hwstart = 0 + def setEnd(self, time, msg=''): self.end = time + if msg: + try: + self.hwend = datetime.strptime(msg, sysvals.tmend) + except: + self.hwend = 0 def isTraceEventOutsideDeviceCalls(self, pid, time): for phase in self.sortedPhases(): list = self.dmesg[phase]['list'] @@ -1546,6 +1642,14 @@ class Data: self.trimTime(tS, tL, left) self.tLow.append('%.0f'%(tL*1000)) lp = phase + def getMemTime(self): + if not self.hwstart or not self.hwend: + return + stime = (self.tSuspended - self.start) * 1000000 + rtime = (self.end - self.tResumed) * 1000000 + hws = self.hwstart + timedelta(microseconds=stime) + hwr = self.hwend - timedelta(microseconds=rtime) + self.tLow.append('%.0f'%((hwr - hws).total_seconds() * 1000)) def getTimeValues(self): sktime = (self.tSuspended - self.tKernSus) * 1000 rktime = (self.tKernRes - self.tResumed) * 1000 @@ -1883,9 +1987,9 @@ class Data: c = self.addProcessUsageEvent(ps, tres) if c > 0: sysvals.vprint('%25s (res): %d' % (ps, c)) - def handleEndMarker(self, time): + def handleEndMarker(self, time, msg=''): dm = self.dmesg - self.setEnd(time) + self.setEnd(time, msg) self.initDevicegroups() # give suspend_prepare an end if needed if 'suspend_prepare' in dm and dm['suspend_prepare']['end'] < 0: @@ -2071,7 +2175,7 @@ class FTraceLine: if not self.fevent: return False if sysvals.usetracemarkers: - if(self.name == 'SUSPEND START'): + if(self.name.startswith('SUSPEND START')): return True return False else: @@ -2084,7 +2188,7 @@ class FTraceLine: if not self.fevent: return False if sysvals.usetracemarkers: - if(self.name == 'RESUME COMPLETE'): + if(self.name.startswith('RESUME COMPLETE')): return True return False else: @@ -2444,7 +2548,7 @@ class Timeline: def createHeader(self, sv, stamp): if(not stamp['time']): return - self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \ + self.html += '<div class="version"><a href="https://01.org/pm-graph">%s v%s</a></div>' \ % (sv.title, sv.version) if sv.logmsg and sv.testlog: self.html += '<button id="showtest" class="logbtn btnfmt">log</button>' @@ -2670,14 +2774,11 @@ class TestProps: stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' - batteryfmt = '^# battery (?P<a1>\w*) (?P<c1>\d*) (?P<a2>\w*) (?P<c2>\d*)' - wififmt = '^# wifi (?P<w>.*)' + wififmt = '^# wifi *(?P<d>\S*) *(?P<s>\S*) *(?P<t>[0-9\.]+).*' tstatfmt = '^# turbostat (?P<t>\S*)' - mcelogfmt = '^# mcelog (?P<m>\S*)' testerrfmt = '^# enter_sleep_error (?P<e>.*)' sysinfofmt = '^# sysinfo .*' cmdlinefmt = '^# command \| (?P<cmd>.*)' - kparamsfmt = '^# kparams \| (?P<kp>.*)' devpropfmt = '# Device Properties: .*' pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9]*): (?P<info>.*)' tracertypefmt = '# tracer: (?P<t>.*)' @@ -2695,11 +2796,8 @@ class TestProps: self.stamp = '' self.sysinfo = '' self.cmdline = '' - self.kparams = '' self.testerror = [] - self.mcelog = [] self.turbostat = [] - self.battery = [] self.wifi = [] self.fwdata = [] self.ftrace_line_fmt = self.ftrace_line_fmt_nop @@ -2721,21 +2819,12 @@ class TestProps: elif re.match(self.sysinfofmt, line): self.sysinfo = line return True - elif re.match(self.kparamsfmt, line): - self.kparams = line - return True elif re.match(self.cmdlinefmt, line): self.cmdline = line return True - elif re.match(self.mcelogfmt, line): - self.mcelog.append(line) - return True elif re.match(self.tstatfmt, line): self.turbostat.append(line) return True - elif re.match(self.batteryfmt, line): - self.battery.append(line) - return True elif re.match(self.wififmt, line): self.wifi.append(line) return True @@ -2749,6 +2838,8 @@ class TestProps: def parseStamp(self, data, sv): # global test data m = re.match(self.stampfmt, self.stamp) + if not self.stamp or not m: + doError('data does not include the expected stamp') data.stamp = {'time': '', 'host': '', 'mode': ''} dt = datetime(int(m.group('y'))+2000, int(m.group('m')), int(m.group('d')), int(m.group('H')), int(m.group('M')), @@ -2780,10 +2871,6 @@ class TestProps: m = re.match(self.cmdlinefmt, self.cmdline) if m: sv.cmdline = m.group('cmd') - if self.kparams: - m = re.match(self.kparamsfmt, self.kparams) - if m: - sv.kparams = m.group('kp') if not sv.stamp: sv.stamp = data.stamp # firmware data @@ -2793,26 +2880,18 @@ class TestProps: data.fwSuspend, data.fwResume = int(m.group('s')), int(m.group('r')) if(data.fwSuspend > 0 or data.fwResume > 0): data.fwValid = True - # mcelog data - if len(self.mcelog) > data.testnumber: - m = re.match(self.mcelogfmt, self.mcelog[data.testnumber]) - if m: - data.mcelog = sv.b64unzip(m.group('m')) # turbostat data if len(self.turbostat) > data.testnumber: m = re.match(self.tstatfmt, self.turbostat[data.testnumber]) if m: data.turbostat = m.group('t') - # battery data - if len(self.battery) > data.testnumber: - m = re.match(self.batteryfmt, self.battery[data.testnumber]) - if m: - data.battery = m.groups() # wifi data if len(self.wifi) > data.testnumber: m = re.match(self.wififmt, self.wifi[data.testnumber]) if m: - data.wifi = m.group('w') + data.wifi = {'dev': m.group('d'), 'stat': m.group('s'), + 'time': float(m.group('t'))} + data.stamp['wifi'] = m.group('d') # sleep mode enter errors if len(self.testerror) > data.testnumber: m = re.match(self.testerrfmt, self.testerror[data.testnumber]) @@ -3012,13 +3091,13 @@ def appendIncompleteTraceLog(testruns): if(t.startMarker()): data = testrun[testidx].data tp.parseStamp(data, sysvals) - data.setStart(t.time) + data.setStart(t.time, t.name) continue if(not data): continue # find the end of resume if(t.endMarker()): - data.setEnd(t.time) + data.setEnd(t.time, t.name) testidx += 1 if(testidx >= testcnt): break @@ -3081,7 +3160,7 @@ def parseTraceLog(live=False): doError('%s does not exist' % sysvals.ftracefile) if not live: sysvals.setupAllKprobes() - ksuscalls = ['pm_prepare_console'] + ksuscalls = ['ksys_sync', 'pm_prepare_console'] krescalls = ['pm_restore_console'] tracewatch = ['irq_wakeup'] if sysvals.usekprobes: @@ -3094,7 +3173,7 @@ def parseTraceLog(live=False): testruns = [] testdata = [] testrun = 0 - data = 0 + data, limbo = 0, True tf = sysvals.openlog(sysvals.ftracefile, 'r') phase = 'suspend_prepare' for line in tf: @@ -3141,16 +3220,16 @@ def parseTraceLog(live=False): continue # find the start of suspend if(t.startMarker()): - data = Data(len(testdata)) + data, limbo = Data(len(testdata)), False testdata.append(data) testrun = TestRun(data) testruns.append(testrun) tp.parseStamp(data, sysvals) - data.setStart(t.time) + data.setStart(t.time, t.name) data.first_suspend_prepare = True phase = data.setPhase('suspend_prepare', t.time, True) continue - if(not data): + if(not data or limbo): continue # process cpu exec line if t.type == 'tracing_mark_write': @@ -3167,14 +3246,16 @@ def parseTraceLog(live=False): continue # find the end of resume if(t.endMarker()): - data.handleEndMarker(t.time) + if data.tKernRes == 0: + data.tKernRes = t.time + data.handleEndMarker(t.time, t.name) if(not sysvals.usetracemarkers): # no trace markers? then quit and be sure to finish recording # the event we used to trigger resume end if('thaw_processes' in testrun.ttemp and len(testrun.ttemp['thaw_processes']) > 0): # if an entry exists, assume this is its end testrun.ttemp['thaw_processes'][-1]['end'] = t.time - break + limbo = True continue # trace event processing if(t.fevent): @@ -3197,7 +3278,7 @@ def parseTraceLog(live=False): # -- phase changes -- # start of kernel suspend if(re.match('suspend_enter\[.*', t.name)): - if(isbegin): + if(isbegin and data.tKernSus == 0): data.tKernSus = t.time continue # suspend_prepare start @@ -3225,7 +3306,7 @@ def parseTraceLog(live=False): elif(re.match('machine_suspend\[.*', t.name)): if(isbegin): lp = data.lastPhase() - if lp == 'resume_machine': + if lp.startswith('resume_machine'): data.dmesg[lp]['end'] = t.time phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True) data.setPhase(phase, t.time, False) @@ -3320,7 +3401,8 @@ def parseTraceLog(live=False): 'proc': m_proc, }) # start of kernel resume - if(phase == 'suspend_prepare' and kprobename in ksuscalls): + if(data.tKernSus == 0 and phase == 'suspend_prepare' \ + and kprobename in ksuscalls): data.tKernSus = t.time elif(t.freturn): if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1: @@ -3355,7 +3437,7 @@ def parseTraceLog(live=False): sysvals.vprint('WARNING: ftrace start marker is missing') if data and not data.devicegroups: sysvals.vprint('WARNING: ftrace end marker is missing') - data.handleEndMarker(t.time) + data.handleEndMarker(t.time, t.name) if sysvals.suspendmode == 'command': for test in testruns: @@ -3476,6 +3558,10 @@ def parseTraceLog(live=False): data.fwValid = False sysvals.vprint('WARNING: phase "%s" is missing!' % p) lp = p + if not terr and 'dev' in data.wifi and data.wifi['stat'] == 'timeout': + terr = '%s%s failed in wifi_resume <i>(%s %.0fs timeout)</i>' % \ + (sysvals.suspendmode, tn, data.wifi['dev'], data.wifi['time']) + error.append(terr) if not terr and data.enterfail: pprint('test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail)) terr = 'test%s failed to enter %s mode' % (tn, sysvals.suspendmode) @@ -3933,7 +4019,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title): tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [dict(), dict()] iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] num = 0 - useturbo = False + useturbo = usewifi = False lastmode = '' cnt = dict() for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])): @@ -3952,17 +4038,17 @@ def createHTMLSummarySimple(testruns, htmlfile, title): tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [dict(), dict()] iMin, iMed, iMax = [0, 0], [0, 0], [0, 0] num = 0 - pkgpc10 = syslpi = '' + pkgpc10 = syslpi = wifi = '' if 'pkgpc10' in data and 'syslpi' in data: - pkgpc10 = data['pkgpc10'] - syslpi = data['syslpi'] - useturbo = True + pkgpc10, syslpi, useturbo = data['pkgpc10'], data['syslpi'], True + if 'wifi' in data: + wifi, usewifi = data['wifi'], True res = data['result'] tVal = [float(data['suspend']), float(data['resume'])] list[mode]['data'].append([data['host'], data['kernel'], data['time'], tVal[0], tVal[1], data['url'], res, data['issues'], data['sus_worst'], data['sus_worsttime'], - data['res_worst'], data['res_worsttime'], pkgpc10, syslpi]) + data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi]) idx = len(list[mode]['data']) - 1 if res.startswith('fail in'): res = 'fail' @@ -4002,7 +4088,12 @@ def createHTMLSummarySimple(testruns, htmlfile, title): td = '\t<td>{0}</td>\n' tdh = '\t<td{1}>{0}</td>\n' tdlink = '\t<td><a href="{0}">html</a></td>\n' - colspan = '14' if useturbo else '12' + cols = 12 + if useturbo: + cols += 2 + if usewifi: + cols += 1 + colspan = '%d' % cols # table header html += '<table>\n<tr>\n' + th.format('#') +\ @@ -4013,6 +4104,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title): th.format('Worst Resume Device') + th.format('RD Time') if useturbo: html += th.format('PkgPC10') + th.format('SysLPI') + if usewifi: + html += th.format('Wifi') html += th.format('Detail')+'</tr>\n' # export list into html head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\ @@ -4076,6 +4169,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title): if useturbo: html += td.format(d[12]) # pkg_pc10 html += td.format(d[13]) # syslpi + if usewifi: + html += td.format(d[14]) # wifi html += tdlink.format(d[5]) if d[5] else td.format('') # url html += '</tr>\n' num += 1 @@ -4224,6 +4319,8 @@ def createHTML(testruns, testfail): kerror = True if(sysvals.suspendmode in ['freeze', 'standby']): data.trimFreezeTime(testruns[-1].tSuspended) + else: + data.getMemTime() # html function templates html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">{2}→</div>\n' @@ -4242,13 +4339,10 @@ def createHTML(testruns, testfail): '<td class="green">Execution Time: <b>{0} ms</b></td>'\ '<td class="yellow">Command: <b>{1}</b></td>'\ '</tr>\n</table>\n' - html_timegroups = '<table class="time2">\n<tr>'\ - '<td class="green" title="time from kernel enter_state({5}) to firmware mode [kernel time only]">{4}Kernel Suspend: {0} ms</td>'\ - '<td class="purple">{4}Firmware Suspend: {1} ms</td>'\ - '<td class="purple">{4}Firmware Resume: {2} ms</td>'\ - '<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\ - '</tr>\n</table>\n' html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n' + html_kdesc = '<td class="{3}" title="time spent in kernel execution">{0}Kernel {2}: {1} ms</td>' + html_fwdesc = '<td class="{3}" title="time spent in firmware">{0}Firmware {2}: {1} ms</td>' + html_wifdesc = '<td class="yellow" title="time for wifi to reconnect after resume complete ({2})">{0}Wifi Resume: {1}</td>' # html format variables scaleH = 20 @@ -4264,13 +4358,10 @@ def createHTML(testruns, testfail): # Generate the header for this timeline for data in testruns: tTotal = data.end - data.start - sktime, rktime = data.getTimeValues() if(tTotal == 0): doError('No timeline data') - if(len(data.tLow) > 0): - low_time = '+'.join(data.tLow) if sysvals.suspendmode == 'command': - run_time = '%.0f'%((data.end-data.start)*1000) + run_time = '%.0f' % (tTotal * 1000) if sysvals.testcommand: testdesc = sysvals.testcommand else: @@ -4279,43 +4370,55 @@ def createHTML(testruns, testfail): testdesc = ordinal(data.testnumber+1)+' '+testdesc thtml = html_timetotal3.format(run_time, testdesc) devtl.html += thtml - elif data.fwValid: - suspend_time = '%.0f'%(sktime + (data.fwSuspend/1000000.0)) - resume_time = '%.0f'%(rktime + (data.fwResume/1000000.0)) - testdesc1 = 'Total' - testdesc2 = '' - stitle = 'time from kernel enter_state(%s) to low-power mode [kernel & firmware time]' % sysvals.suspendmode - rtitle = 'time from low-power mode to return from kernel enter_state(%s) [firmware & kernel time]' % sysvals.suspendmode - if(len(testruns) > 1): - testdesc1 = testdesc2 = ordinal(data.testnumber+1) - testdesc2 += ' ' - if(len(data.tLow) == 0): - thtml = html_timetotal.format(suspend_time, \ - resume_time, testdesc1, stitle, rtitle) - else: - thtml = html_timetotal2.format(suspend_time, low_time, \ - resume_time, testdesc1, stitle, rtitle) - devtl.html += thtml + continue + # typical full suspend/resume header + stot, rtot = sktime, rktime = data.getTimeValues() + ssrc, rsrc, testdesc, testdesc2 = ['kernel'], ['kernel'], 'Kernel', '' + if data.fwValid: + stot += (data.fwSuspend/1000000.0) + rtot += (data.fwResume/1000000.0) + ssrc.append('firmware') + rsrc.append('firmware') + testdesc = 'Total' + if 'time' in data.wifi and data.wifi['stat'] != 'timeout': + rtot += data.end - data.tKernRes + (data.wifi['time'] * 1000.0) + rsrc.append('wifi') + testdesc = 'Total' + suspend_time, resume_time = '%.3f' % stot, '%.3f' % rtot + stitle = 'time from kernel suspend start to %s mode [%s time]' % \ + (sysvals.suspendmode, ' & '.join(ssrc)) + rtitle = 'time from %s mode to kernel resume complete [%s time]' % \ + (sysvals.suspendmode, ' & '.join(rsrc)) + if(len(testruns) > 1): + testdesc = testdesc2 = ordinal(data.testnumber+1) + testdesc2 += ' ' + if(len(data.tLow) == 0): + thtml = html_timetotal.format(suspend_time, \ + resume_time, testdesc, stitle, rtitle) + else: + low_time = '+'.join(data.tLow) + thtml = html_timetotal2.format(suspend_time, low_time, \ + resume_time, testdesc, stitle, rtitle) + devtl.html += thtml + if not data.fwValid and 'dev' not in data.wifi: + continue + # extra detail when the times come from multiple sources + thtml = '<table class="time2">\n<tr>' + thtml += html_kdesc.format(testdesc2, '%.3f'%sktime, 'Suspend', 'green') + if data.fwValid: sftime = '%.3f'%(data.fwSuspend / 1000000.0) rftime = '%.3f'%(data.fwResume / 1000000.0) - devtl.html += html_timegroups.format('%.3f'%sktime, \ - sftime, rftime, '%.3f'%rktime, testdesc2, sysvals.suspendmode) - else: - suspend_time = '%.3f' % sktime - resume_time = '%.3f' % rktime - testdesc = 'Kernel' - stitle = 'time from kernel enter_state(%s) to firmware mode [kernel time only]' % sysvals.suspendmode - rtitle = 'time from firmware mode to return from kernel enter_state(%s) [kernel time only]' % sysvals.suspendmode - if(len(testruns) > 1): - testdesc = ordinal(data.testnumber+1)+' '+testdesc - if(len(data.tLow) == 0): - thtml = html_timetotal.format(suspend_time, \ - resume_time, testdesc, stitle, rtitle) + thtml += html_fwdesc.format(testdesc2, sftime, 'Suspend', 'green') + thtml += html_fwdesc.format(testdesc2, rftime, 'Resume', 'yellow') + thtml += html_kdesc.format(testdesc2, '%.3f'%rktime, 'Resume', 'yellow') + if 'time' in data.wifi: + if data.wifi['stat'] != 'timeout': + wtime = '%.0f ms'%(data.end - data.tKernRes + (data.wifi['time'] * 1000.0)) else: - thtml = html_timetotal2.format(suspend_time, low_time, \ - resume_time, testdesc, stitle, rtitle) - devtl.html += thtml - + wtime = 'TIMEOUT' + thtml += html_wifdesc.format(testdesc2, wtime, data.wifi['dev']) + thtml += '</tr>\n</table>\n' + devtl.html += thtml if testfail: devtl.html += html_fail.format(testfail) @@ -5082,28 +5185,32 @@ def setRuntimeSuspend(before=True): # Description: # Execute system suspend through the sysfs interface, then copy the output # dmesg and ftrace files to the test output directory. -def executeSuspend(): +def executeSuspend(quiet=False): pm = ProcessMonitor() tp = sysvals.tpath - wifi = sysvals.checkWifi() + if sysvals.wifi: + wifi = sysvals.checkWifi() testdata = [] - battery = True if getBattery() else False # run these commands to prepare the system for suspend if sysvals.display: - pprint('SET DISPLAY TO %s' % sysvals.display.upper()) + if not quiet: + pprint('SET DISPLAY TO %s' % sysvals.display.upper()) displayControl(sysvals.display) time.sleep(1) if sysvals.sync: - pprint('SYNCING FILESYSTEMS') + if not quiet: + pprint('SYNCING FILESYSTEMS') call('sync', shell=True) # mark the start point in the kernel ring buffer just as we start sysvals.initdmesg() # start ftrace if(sysvals.usecallgraph or sysvals.usetraceevents): - pprint('START TRACING') + if not quiet: + pprint('START TRACING') sysvals.fsetVal('1', 'tracing_on') if sysvals.useprocmon: pm.start() + sysvals.cmdinfo(True) # execute however many s/r runs requested for count in range(1,sysvals.execcount+1): # x2delay in between test runs @@ -5119,15 +5226,14 @@ def executeSuspend(): pprint('SUSPEND START') else: pprint('SUSPEND START (press a key to resume)') - sysvals.mcelog(True) - bat1 = getBattery() if battery else False # set rtcwake if(sysvals.rtcwake): - pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime) + if not quiet: + pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime) sysvals.rtcWakeAlarmOn() # start of suspend trace marker if(sysvals.usecallgraph or sysvals.usetraceevents): - sysvals.fsetVal('SUSPEND START', 'trace_marker') + sysvals.fsetVal(datetime.now().strftime(sysvals.tmstart), 'trace_marker') # predelay delay if(count == 1 and sysvals.predelay > 0): sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker') @@ -5174,37 +5280,33 @@ def executeSuspend(): # return from suspend pprint('RESUME COMPLETE') if(sysvals.usecallgraph or sysvals.usetraceevents): - sysvals.fsetVal('RESUME COMPLETE', 'trace_marker') + sysvals.fsetVal(datetime.now().strftime(sysvals.tmend), 'trace_marker') + if sysvals.wifi and wifi: + tdata['wifi'] = sysvals.pollWifi(wifi) if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'): tdata['fw'] = getFPDT(False) - mcelog = sysvals.mcelog() - if mcelog: - tdata['mcelog'] = mcelog - bat2 = getBattery() if battery else False - if battery and bat1 and bat2: - tdata['bat'] = (bat1, bat2) - if 'device' in wifi and 'ip' in wifi: - tdata['wifi'] = (wifi, sysvals.checkWifi()) testdata.append(tdata) + cmdafter = sysvals.cmdinfo(False) # stop ftrace if(sysvals.usecallgraph or sysvals.usetraceevents): if sysvals.useprocmon: pm.stop() sysvals.fsetVal('0', 'tracing_on') # grab a copy of the dmesg output - pprint('CAPTURING DMESG') + if not quiet: + pprint('CAPTURING DMESG') sysvals.getdmesg(testdata) # grab a copy of the ftrace output if(sysvals.usecallgraph or sysvals.usetraceevents): - pprint('CAPTURING TRACE') + if not quiet: + pprint('CAPTURING TRACE') op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata) fp = open(tp+'trace', 'r') for line in fp: op.write(line) op.close() sysvals.fsetVal('', 'trace') - sysvals.platforminfo() - return testdata + sysvals.platforminfo(cmdafter) def readFile(file): if os.path.islink(file): @@ -5447,25 +5549,6 @@ def dmidecode(mempath, fatal=False): count += 1 return out -def getBattery(): - p, charge, bat = '/sys/class/power_supply', 0, {} - if not os.path.exists(p): - return False - for d in os.listdir(p): - type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower() - if type != 'battery': - continue - for v in ['status', 'energy_now', 'capacity_now']: - bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower() - break - if 'status' not in bat: - return False - ac = False if 'discharging' in bat['status'] else True - for v in ['energy_now', 'capacity_now']: - if v in bat and bat[v]: - charge = int(bat[v]) - return (ac, charge) - def displayControl(cmd): xset, ret = 'timeout 10 xset -d :0.0 {0}', 0 if sysvals.sudouser: @@ -5715,6 +5798,17 @@ def statusCheck(probecheck=False): status = 'rtcwake is not properly supported' pprint(' is rtcwake supported: %s' % res) + # check info commands + pprint(' optional commands this tool may use for info:') + no = sysvals.colorText('MISSING') + yes = sysvals.colorText('FOUND', 32) + for c in ['turbostat', 'mcelog', 'lspci', 'lsusb']: + if c == 'turbostat': + res = yes if sysvals.haveTurbostat() else no + else: + res = yes if sysvals.getExec(c) else no + pprint(' %s: %s' % (c, res)) + if not probecheck: return status @@ -5780,8 +5874,9 @@ def getArgFloat(name, args, min, max, main=True): doError(name+': value should be between %f and %f' % (min, max), True) return val -def processData(live=False): - pprint('PROCESSING DATA') +def processData(live=False, quiet=False): + if not quiet: + pprint('PROCESSING DATA') sysvals.vprint('usetraceevents=%s, usetracemarkers=%s, usekprobes=%s' % \ (sysvals.usetraceevents, sysvals.usetracemarkers, sysvals.usekprobes)) error = '' @@ -5796,20 +5891,17 @@ def processData(live=False): parseKernelLog(data) if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)): appendIncompleteTraceLog(testruns) + if not sysvals.stamp: + pprint('ERROR: data does not include the expected stamp') + return (testruns, {'error': 'timeline generation failed'}) shown = ['bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr', - 'memsz', 'mode', 'numcpu', 'plat', 'time'] + 'memsz', 'mode', 'numcpu', 'plat', 'time', 'wifi'] sysvals.vprint('System Info:') for key in sorted(sysvals.stamp): if key in shown: sysvals.vprint(' %-8s : %s' % (key.upper(), sysvals.stamp[key])) - if sysvals.kparams: - sysvals.vprint('Kparams:\n %s' % sysvals.kparams) sysvals.vprint('Command:\n %s' % sysvals.cmdline) for data in testruns: - if data.mcelog: - sysvals.vprint('MCELOG Data:') - for line in data.mcelog.split('\n'): - sysvals.vprint(' %s' % line) if data.turbostat: idx, s = 0, 'Turbostat:\n ' for val in data.turbostat.split('|'): @@ -5819,23 +5911,13 @@ def processData(live=False): s += '\n ' s += val + ' ' sysvals.vprint(s) - if data.battery: - a1, c1, a2, c2 = data.battery - s = 'Battery:\n Before - AC: %s, Charge: %d\n After - AC: %s, Charge: %d' % \ - (a1, int(c1), a2, int(c2)) - sysvals.vprint(s) - if data.wifi: - w = data.wifi.replace('|', ' ').split(',') - s = 'Wifi:\n Before %s\n After %s' % \ - (w[0], w[1]) - sysvals.vprint(s) data.printDetails() - if len(sysvals.platinfo) > 0: - sysvals.vprint('\nPlatform Info:') - for info in sysvals.platinfo: - sysvals.vprint(info[0]+' - '+info[1]) - sysvals.vprint(info[2]) - sysvals.vprint('') + if len(sysvals.platinfo) > 0: + sysvals.vprint('\nPlatform Info:') + for info in sysvals.platinfo: + sysvals.vprint('[%s - %s]' % (info[0], info[1])) + sysvals.vprint(info[2]) + sysvals.vprint('') if sysvals.cgdump: for data in testruns: data.debugPrint() @@ -5845,7 +5927,8 @@ def processData(live=False): return (testruns, {'error': 'timeline generation failed'}) sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile) createHTML(testruns, error) - pprint('DONE') + if not quiet: + pprint('DONE') data = testruns[0] stamp = data.stamp stamp['suspend'], stamp['resume'] = data.getTimeValues() @@ -5872,31 +5955,28 @@ def rerunTest(htmlfile=''): doError('a directory already exists with this name: %s' % sysvals.htmlfile) elif not os.access(sysvals.htmlfile, os.W_OK): doError('missing permission to write to %s' % sysvals.htmlfile) - testruns, stamp = processData(False) - sysvals.logmsg = '' + testruns, stamp = processData() + sysvals.resetlog() return stamp # Function: runTest # Description: # execute a suspend/resume, gather the logs, and generate the output -def runTest(n=0): +def runTest(n=0, quiet=False): # prepare for the test - sysvals.initFtrace() + sysvals.initFtrace(quiet) sysvals.initTestOutput('suspend') # execute the test - testdata = executeSuspend() + executeSuspend(quiet) sysvals.cleanupFtrace() if sysvals.skiphtml: + sysvals.outputResult({}, n) sysvals.sudoUserchown(sysvals.testdir) return - if not testdata[0]['error']: - testruns, stamp = processData(True) - for data in testruns: - del data - else: - stamp = testdata[0] - + testruns, stamp = processData(True, quiet) + for data in testruns: + del data sysvals.sudoUserchown(sysvals.testdir) sysvals.outputResult(stamp, n) if 'error' in stamp: @@ -5904,13 +5984,14 @@ def runTest(n=0): return 0 def find_in_html(html, start, end, firstonly=True): - n, out = 0, [] - while n < len(html): - m = re.search(start, html[n:]) + n, cnt, out = 0, len(html), [] + while n < cnt: + e = cnt if (n + 10000 > cnt or n == 0) else n + 10000 + m = re.search(start, html[n:e]) if not m: break i = m.end() - m = re.search(end, html[n+i:]) + m = re.search(end, html[n+i:e]) if not m: break j = m.start() @@ -5945,7 +6026,7 @@ def data_from_html(file, outpath, issues, fulldetail=False): tstr = dt.strftime('%Y/%m/%d %H:%M:%S') error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>') if error: - m = re.match('[a-z]* failed in (?P<p>[a-z0-9_]*) phase', error) + m = re.match('[a-z0-9]* failed in (?P<p>\S*).*', error) if m: result = 'fail in %s' % m.group('p') else: @@ -5974,6 +6055,9 @@ def data_from_html(file, outpath, issues, fulldetail=False): elist[err[0]] += 1 for i in elist: ilist.append('%sx%d' % (i, elist[i]) if elist[i] > 1 else i) + wifi = find_in_html(html, 'Wifi Resume: ', '</td>') + if wifi: + extra['wifi'] = wifi low = find_in_html(html, 'freeze time: <b>', ' ms</b>') if low and '|' in low: issue = 'FREEZEx%d' % len(low.split('|')) @@ -6048,13 +6132,15 @@ def genHtml(subdir, force=False): for dirname, dirnames, filenames in os.walk(subdir): sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = '' for filename in filenames: - if(re.match('.*_dmesg.txt', filename)): - sysvals.dmesgfile = os.path.join(dirname, filename) - elif(re.match('.*_ftrace.txt', filename)): - sysvals.ftracefile = os.path.join(dirname, filename) + file = os.path.join(dirname, filename) + if sysvals.usable(file): + if(re.match('.*_dmesg.txt', filename)): + sysvals.dmesgfile = file + elif(re.match('.*_ftrace.txt', filename)): + sysvals.ftracefile = file sysvals.setOutputFile() - if sysvals.ftracefile and sysvals.htmlfile and \ - (force or not os.path.exists(sysvals.htmlfile)): + if (sysvals.dmesgfile or sysvals.ftracefile) and sysvals.htmlfile and \ + (force or not sysvals.usable(sysvals.htmlfile)): pprint('FTRACE: %s' % sysvals.ftracefile) if sysvals.dmesgfile: pprint('DMESG : %s' % sysvals.dmesgfile) @@ -6169,9 +6255,9 @@ def configFromFile(file): sysvals.cgtest = getArgInt('cgtest', value, 0, 1, False) elif(option == 'cgphase'): d = Data(0) - if value not in d.sortedPhases(): + if value not in d.phasedef: doError('invalid phase --> (%s: %s), valid phases are %s'\ - % (option, value, d.sortedPhases()), True) + % (option, value, d.phasedef.keys()), True) sysvals.cgphase = value elif(option == 'fadd'): file = sysvals.configFile(value) @@ -6184,9 +6270,7 @@ def configFromFile(file): nums = value.split() if len(nums) != 2: doError('multi requires 2 integers (exec_count and delay)', True) - sysvals.multitest['run'] = True - sysvals.multitest['count'] = getArgInt('multi: n d (exec count)', nums[0], 2, 1000000, False) - sysvals.multitest['delay'] = getArgInt('multi: n d (delay between tests)', nums[1], 0, 3600, False) + sysvals.multiinit(nums[0], nums[1]) elif(option == 'devicefilter'): sysvals.setDeviceFilter(value) elif(option == 'expandcg'): @@ -6342,6 +6426,7 @@ def printHelp(): ' -srgap Add a visible gap in the timeline between sus/res (default: disabled)\n'\ ' -skiphtml Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\ ' -result fn Export a results table to a text file for parsing.\n'\ + ' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\ ' [testprep]\n'\ ' -sync Sync the filesystems before starting the test\n'\ ' -rs on/off Enable/disable runtime suspend for all devices, restore all after test\n'\ @@ -6356,8 +6441,10 @@ def printHelp(): ' -predelay t Include t ms delay before 1st suspend (default: 0 ms)\n'\ ' -postdelay t Include t ms delay after last resume (default: 0 ms)\n'\ ' -mindev ms Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)\n'\ - ' -multi n d Execute <n> consecutive tests at <d> seconds intervals. The outputs will\n'\ - ' be created in a new subdirectory with a summary page.\n'\ + ' -multi n d Execute <n> consecutive tests at <d> seconds intervals. If <n> is followed\n'\ + ' by a "d", "h", or "m" execute for <n> days, hours, or mins instead.\n'\ + ' The outputs will be created in a new subdirectory with a summary page.\n'\ + ' -maxfail n Abort a -multi run after n consecutive fails (default is 0 = never abort)\n'\ ' [debug]\n'\ ' -f Use ftrace to create device callgraphs (default: disabled)\n'\ ' -ftop Use ftrace on the top level call: "%s" (default: disabled)\n'\ @@ -6379,11 +6466,11 @@ def printHelp(): ' -modes List available suspend modes\n'\ ' -status Test to see if the system is enabled to run this tool\n'\ ' -fpdt Print out the contents of the ACPI Firmware Performance Data Table\n'\ - ' -battery Print out battery info (if available)\n'\ - ' -wifi Print out wifi connection info (if wireless-tools and device exists)\n'\ + ' -wificheck Print out wifi connection info\n'\ ' -x<mode> Test xset by toggling the given mode (on/off/standby/suspend)\n'\ ' -sysinfo Print out system info extracted from BIOS\n'\ ' -devinfo Print out the pm settings of all devices which support runtime suspend\n'\ + ' -cmdinfo Print out all the platform info collected before and after suspend/resume\n'\ ' -flist Print the list of functions currently being captured in ftrace\n'\ ' -flistall Print all functions capable of being captured in ftrace\n'\ ' -summary dir Create a summary of tests in this dir [-genhtml builds missing html]\n'\ @@ -6399,8 +6486,8 @@ if __name__ == '__main__': genhtml = False cmd = '' simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', - '-devinfo', '-status', '-battery', '-xon', '-xoff', '-xstandby', - '-xsuspend', '-xinit', '-xreset', '-xstat', '-wifi'] + '-devinfo', '-status', '-xon', '-xoff', '-xstandby', '-xsuspend', + '-xinit', '-xreset', '-xstat', '-wificheck', '-cmdinfo'] if '-f' in sys.argv: sysvals.cgskip = sysvals.configFile('cgskip.txt') # loop through the command line arguments @@ -6462,8 +6549,15 @@ if __name__ == '__main__': sysvals.usedevsrc = True elif(arg == '-sync'): sysvals.sync = True + elif(arg == '-wifi'): + sysvals.wifi = True elif(arg == '-gzip'): sysvals.gzip = True + elif(arg == '-info'): + try: + val = next(args) + except: + doError('-info requires one string argument', True) elif(arg == '-rs'): try: val = next(args) @@ -6555,10 +6649,14 @@ if __name__ == '__main__': sysvals.cgexp = True elif(arg == '-srgap'): sysvals.srgap = 5 + elif(arg == '-maxfail'): + sysvals.maxfail = getArgInt('-maxfail', args, 0, 1000000) elif(arg == '-multi'): - sysvals.multitest['run'] = True - sysvals.multitest['count'] = getArgInt('-multi n d (exec count)', args, 2, 1000000) - sysvals.multitest['delay'] = getArgInt('-multi n d (delay between tests)', args, 0, 3600) + try: + c, d = next(args), next(args) + except: + doError('-multi requires two values', True) + sysvals.multiinit(c, d) elif(arg == '-o'): try: val = next(args) @@ -6655,13 +6753,6 @@ if __name__ == '__main__': elif(cmd == 'fpdt'): if not getFPDT(True): ret = 1 - elif(cmd == 'battery'): - out = getBattery() - if out: - pprint('AC Connect : %s\nBattery Charge: %d' % out) - else: - pprint('no battery found') - ret = 1 elif(cmd == 'sysinfo'): sysvals.printSystemInfo(True) elif(cmd == 'devinfo'): @@ -6679,13 +6770,15 @@ if __name__ == '__main__': ret = displayControl(cmd[1:]) elif(cmd == 'xstat'): pprint('Display Status: %s' % displayControl('stat').upper()) - elif(cmd == 'wifi'): - out = sysvals.checkWifi() - if 'device' not in out: - pprint('WIFI interface not found') + elif(cmd == 'wificheck'): + dev = sysvals.checkWifi() + if dev: + print('%s is connected' % sysvals.wifiDetails(dev)) else: - for key in sorted(out): - pprint('%6s: %s' % (key.upper(), out[key])) + print('No wifi connection found') + elif(cmd == 'cmdinfo'): + for out in sysvals.cmdinfo(False, True): + print('[%s - %s]\n%s\n' % out) sys.exit(ret) # if instructed, re-analyze existing data files @@ -6720,24 +6813,38 @@ if __name__ == '__main__': setRuntimeSuspend(True) if sysvals.display: displayControl('init') - ret = 0 + failcnt, ret = 0, 0 if sysvals.multitest['run']: # run multiple tests in a separate subdirectory if not sysvals.outdir: - s = 'suspend-x%d' % sysvals.multitest['count'] - sysvals.outdir = datetime.now().strftime(s+'-%y%m%d-%H%M%S') + if 'time' in sysvals.multitest: + s = '-%dm' % sysvals.multitest['time'] + else: + s = '-x%d' % sysvals.multitest['count'] + sysvals.outdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S'+s) if not os.path.isdir(sysvals.outdir): os.makedirs(sysvals.outdir) + sysvals.sudoUserchown(sysvals.outdir) + finish = datetime.now() + if 'time' in sysvals.multitest: + finish += timedelta(minutes=sysvals.multitest['time']) for i in range(sysvals.multitest['count']): - if(i != 0): + sysvals.multistat(True, i, finish) + if i != 0 and sysvals.multitest['delay'] > 0: pprint('Waiting %d seconds...' % (sysvals.multitest['delay'])) time.sleep(sysvals.multitest['delay']) - pprint('TEST (%d/%d) START' % (i+1, sysvals.multitest['count'])) fmt = 'suspend-%y%m%d-%H%M%S' sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt)) - ret = runTest(i+1) - pprint('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count'])) - sysvals.logmsg = '' + ret = runTest(i+1, True) + failcnt = 0 if not ret else failcnt + 1 + if sysvals.maxfail > 0 and failcnt >= sysvals.maxfail: + pprint('Maximum fail count of %d reached, aborting multitest' % (sysvals.maxfail)) + break + time.sleep(5) + sysvals.resetlog() + sysvals.multistat(False, i, finish) + if 'time' in sysvals.multitest and datetime.now() >= finish: + break if not sysvals.skiphtml: runSummary(sysvals.outdir, False, False) sysvals.sudoUserchown(sysvals.outdir) diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index b73763489410..9f68f51ca652 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.3"; +static const char *version_str = "v1.4"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -25,7 +25,7 @@ static FILE *outf; static int cpu_model; static int cpu_stepping; -#define MAX_CPUS_IN_ONE_REQ 64 +#define MAX_CPUS_IN_ONE_REQ 256 static short max_target_cpus; static unsigned short target_cpus[MAX_CPUS_IN_ONE_REQ]; @@ -653,7 +653,7 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask, pkg_id = get_physical_package_id(cpu); for (i = 0; i < 64; ++i) { - if (core_mask & BIT(i)) { + if (core_mask & BIT_ULL(i)) { int j; for (j = 0; j < topo_max_cpus; ++j) { @@ -1169,6 +1169,7 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2, ctdp_level = &clx_n_pkg_dev.ctdp_level[0]; pbf_info = &ctdp_level->pbf_info; + clx_n_pkg_dev.processed = 1; isst_ctdp_display_information(cpu, outf, tdp_level, &clx_n_pkg_dev); free_cpu_set(ctdp_level->core_cpumask); free_cpu_set(pbf_info->core_cpumask); @@ -1631,6 +1632,8 @@ static int set_pbf_core_power(int cpu) static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; int ret; int status = *(int *)arg4; @@ -1646,6 +1649,24 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, goto disp_result; } + ret = isst_get_ctdp_levels(cpu, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + goto disp_result; + } + + ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level); + if (ret) { + isst_display_error_info_message(1, "Failed to get current level", 0, 0); + goto disp_result; + } + + if (!ctdp_level.pbf_support) { + isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, pkg_dev.current_level); + ret = -1; + goto disp_result; + } + if (auto_mode && status) { ret = set_pbf_core_power(cpu); if (ret) @@ -1772,10 +1793,30 @@ static void dump_fact_config(int arg) static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3, void *arg4) { + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; int ret; int status = *(int *)arg4; - if (auto_mode && status) { + ret = isst_get_ctdp_levels(cpu, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + goto disp_results; + } + + ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level); + if (ret) { + isst_display_error_info_message(1, "Failed to get current level", 0, 0); + goto disp_results; + } + + if (!ctdp_level.fact_support) { + isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, pkg_dev.current_level); + ret = -1; + goto disp_results; + } + + if (status) { ret = isst_pm_qos_config(cpu, 1, 1); if (ret) goto disp_results; diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c index 67c9b1139631..a7f4337c5777 100644 --- a/tools/power/x86/intel-speed-select/isst-core.c +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -912,16 +912,16 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type) return ret; if (ctdp_level.fact_enabled) { - debug_printf("Turbo-freq feature must be disabled first\n"); + isst_display_error_info_message(1, "Ignoring request, turbo-freq feature is still enabled", 0, 0); return -EINVAL; } ret = isst_write_pm_config(cpu, 0); if (ret) - isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0); + isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0); } else { ret = isst_write_pm_config(cpu, 1); if (ret) - isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error\n", 0, 0); + isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0); } ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0, diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 51dbaa5f02ec..e105fece47b6 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -316,21 +316,31 @@ void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix, { char header[256]; char value[256]; + int level = 1; + + if (out_format_is_json()) { + snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d", + get_physical_package_id(cpu), get_physical_die_id(cpu), + cpu); + format_and_print(outf, level++, header, NULL); + } else { + snprintf(header, sizeof(header), "package-%d", + get_physical_package_id(cpu)); + format_and_print(outf, level++, header, NULL); + snprintf(header, sizeof(header), "die-%d", + get_physical_die_id(cpu)); + format_and_print(outf, level++, header, NULL); + snprintf(header, sizeof(header), "cpu-%d", cpu); + format_and_print(outf, level++, header, NULL); + } - snprintf(header, sizeof(header), "package-%d", - get_physical_package_id(cpu)); - format_and_print(outf, 1, header, NULL); - snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu)); - format_and_print(outf, 2, header, NULL); - snprintf(header, sizeof(header), "cpu-%d", cpu); - format_and_print(outf, 3, header, NULL); if (str0 && !val) snprintf(value, sizeof(value), "%s", str0); else if (str1 && val) snprintf(value, sizeof(value), "%s", str1); else snprintf(value, sizeof(value), "%u", val); - format_and_print(outf, 4, prefix, value); + format_and_print(outf, level, prefix, value); format_and_print(outf, 1, NULL, NULL); } @@ -470,7 +480,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level, _isst_pbf_display_information(cpu, outf, tdp_level, &ctdp_level->pbf_info, - level + 1); + level + 2); continue; } diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 2e1afd856a78..094ba4589a9c 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -29,6 +29,7 @@ #include <sys/ioctl.h> #define BIT(x) (1 << (x)) +#define BIT_ULL(nr) (1ULL << (nr)) #define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h)))) #define GENMASK_ULL(h, l) \ (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h)))) diff --git a/tools/spi/Makefile b/tools/spi/Makefile index 2249a1546cc1..ada881afb489 100644 --- a/tools/spi/Makefile +++ b/tools/spi/Makefile @@ -52,7 +52,9 @@ $(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN) clean: rm -f $(ALL_PROGRAMS) rm -rf $(OUTPUT)include/ - find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete + find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.d' -delete + find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(bindir); \ diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 27967dd90f8f..eec23fa693bd 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -128,18 +128,22 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len) .bits_per_word = bits, }; - if (mode & SPI_TX_QUAD) + if (mode & SPI_TX_OCTAL) + tr.tx_nbits = 8; + else if (mode & SPI_TX_QUAD) tr.tx_nbits = 4; else if (mode & SPI_TX_DUAL) tr.tx_nbits = 2; - if (mode & SPI_RX_QUAD) + if (mode & SPI_RX_OCTAL) + tr.rx_nbits = 8; + else if (mode & SPI_RX_QUAD) tr.rx_nbits = 4; else if (mode & SPI_RX_DUAL) tr.rx_nbits = 2; if (!(mode & SPI_LOOP)) { - if (mode & (SPI_TX_QUAD | SPI_TX_DUAL)) + if (mode & (SPI_TX_OCTAL | SPI_TX_QUAD | SPI_TX_DUAL)) tr.rx_buf = 0; - else if (mode & (SPI_RX_QUAD | SPI_RX_DUAL)) + else if (mode & (SPI_RX_OCTAL | SPI_RX_QUAD | SPI_RX_DUAL)) tr.tx_buf = 0; } @@ -187,6 +191,7 @@ static void print_usage(const char *prog) " -R --ready slave pulls low to pause\n" " -2 --dual dual transfer\n" " -4 --quad quad transfer\n" + " -8 --octal octal transfer\n" " -S --size transfer size\n" " -I --iter iterations\n"); exit(1); @@ -213,13 +218,14 @@ static void parse_opts(int argc, char *argv[]) { "dual", 0, 0, '2' }, { "verbose", 0, 0, 'v' }, { "quad", 0, 0, '4' }, + { "octal", 0, 0, '8' }, { "size", 1, 0, 'S' }, { "iter", 1, 0, 'I' }, { NULL, 0, 0, 0 }, }; int c; - c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:vS:I:", + c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR248p:vS:I:", lopts, NULL); if (c == -1) @@ -280,6 +286,9 @@ static void parse_opts(int argc, char *argv[]) case '4': mode |= SPI_TX_QUAD; break; + case '8': + mode |= SPI_TX_OCTAL; + break; case 'S': transfer_size = atoi(optarg); break; @@ -295,6 +304,8 @@ static void parse_opts(int argc, char *argv[]) mode |= SPI_RX_DUAL; if (mode & SPI_TX_QUAD) mode |= SPI_RX_QUAD; + if (mode & SPI_TX_OCTAL) + mode |= SPI_RX_OCTAL; } } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index cde463af7071..c2642517e1d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -5,7 +5,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, const char *target_obj_file, int prog_cnt, - const char **prog_name) + const char **prog_name, + bool run_prog) { struct bpf_object *obj = NULL, *pkt_obj; int err, pkt_fd, i; @@ -18,7 +19,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + target_obj_file, err, errno)) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .attach_prog_fd = pkt_fd, @@ -33,7 +35,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, obj = bpf_object__open_file(obj_file, &opts); if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", - "failed to open fexit_bpf2bpf: %ld\n", + "failed to open %s: %ld\n", obj_file, PTR_ERR(obj))) goto close_prog; @@ -49,6 +51,10 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) goto close_prog; } + + if (!run_prog) + goto close_prog; + data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); if (CHECK(!data_map, "find_data_map", "data map not found\n")) goto close_prog; @@ -89,7 +95,7 @@ static void test_target_no_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o", "./test_pkt_md_access.o", ARRAY_SIZE(prog_name), - prog_name); + prog_name, true); } static void test_target_yes_callees(void) @@ -103,7 +109,7 @@ static void test_target_yes_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name); + prog_name, true); } static void test_func_replace(void) @@ -120,7 +126,18 @@ static void test_func_replace(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name); + prog_name, true); +} + +static void test_func_replace_verify(void) +{ + const char *prog_name[] = { + "freplace/do_bind", + }; + test_fexit_bpf2bpf_common("./freplace_connect4.o", + "./connect4_prog.o", + ARRAY_SIZE(prog_name), + prog_name, false); } void test_fexit_bpf2bpf(void) @@ -128,4 +145,5 @@ void test_fexit_bpf2bpf(void) test_target_no_callees(); test_target_yes_callees(); test_func_replace(); + test_func_replace_verify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 56d80adcf4bd..43d0b5578f46 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -19,7 +19,7 @@ void test_mmap(void) const size_t map_sz = roundup_page(sizeof(struct map_data)); const int zero = 0, one = 1, two = 2, far = 1500; const long page_size = sysconf(_SC_PAGE_SIZE); - int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd; + int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd; struct bpf_map *data_map, *bss_map; void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; struct test_mmap__bss *bss_data; @@ -37,6 +37,17 @@ void test_mmap(void) data_map = skel->maps.data_map; data_map_fd = bpf_map__fd(data_map); + rdmap_fd = bpf_map__fd(skel->maps.rdonly_map); + tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0); + if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) { + munmap(tmp1, 4096); + goto cleanup; + } + /* now double-check if it's mmap()'able at all */ + tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0); + if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno)) + goto cleanup; + /* get map's ID */ memset(&map_info, 0, map_info_sz); err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz); @@ -217,6 +228,14 @@ void test_mmap(void) munmap(tmp2, 4 * page_size); + /* map all 4 pages, but with pg_off=1 page, should fail */ + tmp1 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED, + data_map_fd, page_size /* initial page shift */); + if (CHECK(tmp1 != MAP_FAILED, "adv_mmap7", "unexpected success")) { + munmap(tmp1, 4 * page_size); + goto cleanup; + } + tmp1 = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0); if (CHECK(tmp1 == MAP_FAILED, "last_mmap", "failed %d\n", errno)) goto cleanup; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 75085119c5bb..ad3c498a8150 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -18,11 +18,25 @@ int _version SEC("version") = 1; +__attribute__ ((noinline)) +int do_bind(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = {}; + + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + + return 1; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { struct bpf_sock_tuple tuple = {}; - struct sockaddr_in sa; struct bpf_sock *sk; /* Verify that new destination is available. */ @@ -56,17 +70,7 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); - /* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); - - sa.sin_family = AF_INET; - sa.sin_port = bpf_htons(0); - sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); - - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - - return 1; + return do_bind(ctx) ? 1 : 0; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_connect4.c b/tools/testing/selftests/bpf/progs/freplace_connect4.c new file mode 100644 index 000000000000..a0ae84230699 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_connect4.c @@ -0,0 +1,18 @@ +#include <linux/stddef.h> +#include <linux/ipv6.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +SEC("freplace/do_bind") +int new_do_bind(struct bpf_sock_addr *ctx) +{ + struct sockaddr_in sa = {}; + + bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index 88b0566da13d..31538c9ed193 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -20,20 +20,12 @@ struct bpf_map_def SEC("maps") btf_map = { BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); -struct dummy_tracepoint_args { - unsigned long long pad; - struct sock *sock; -}; - __attribute__((noinline)) -int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(void) { struct ipv_counts *counts; int key = 0; - if (!arg->sock) - return 0; - counts = bpf_map_lookup_elem(&btf_map, &key); if (!counts) return 0; @@ -44,15 +36,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(void) { - return test_long_fname_2(arg); + return test_long_fname_2(); } SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +int _dummy_tracepoint(void *arg) { - return test_long_fname_1(arg); + return test_long_fname_1(); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index a924e53c8e9d..6c5560162746 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -28,20 +28,12 @@ struct { __type(value, struct ipv_counts); } btf_map SEC(".maps"); -struct dummy_tracepoint_args { - unsigned long long pad; - struct sock *sock; -}; - __attribute__((noinline)) -int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(void) { struct ipv_counts *counts; int key = 0; - if (!arg->sock) - return 0; - counts = bpf_map_lookup_elem(&btf_map, &key); if (!counts) return 0; @@ -57,15 +49,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(void) { - return test_long_fname_2(arg); + return test_long_fname_2(); } SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +int _dummy_tracepoint(void *arg) { - return test_long_fname_1(arg); + return test_long_fname_1(); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 983aedd1c072..506da7fd2da2 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -17,20 +17,12 @@ struct bpf_map_def SEC("maps") btf_map = { .max_entries = 4, }; -struct dummy_tracepoint_args { - unsigned long long pad; - struct sock *sock; -}; - __attribute__((noinline)) -int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(void) { struct ipv_counts *counts; int key = 0; - if (!arg->sock) - return 0; - counts = bpf_map_lookup_elem(&btf_map, &key); if (!counts) return 0; @@ -41,15 +33,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(void) { - return test_long_fname_2(arg); + return test_long_fname_2(); } SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +int _dummy_tracepoint(void *arg) { - return test_long_fname_1(arg); + return test_long_fname_1(); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c index 6239596cd14e..4eb42cff5fe9 100644 --- a/tools/testing/selftests/bpf/progs/test_mmap.c +++ b/tools/testing/selftests/bpf/progs/test_mmap.c @@ -9,6 +9,14 @@ char _license[] SEC("license") = "GPL"; struct { __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4096); + __uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG); + __type(key, __u32); + __type(value, char); +} rdonly_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 512 * 4); /* at least 4 pages of data */ __uint(map_flags, BPF_F_MMAPABLE); __type(key, __u32); diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index 56a50b25cd33..abb7344b531f 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -30,13 +30,13 @@ int prog3(struct bpf_raw_tracepoint_args *ctx) SEC("fentry/__set_task_comm") int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec) { - return !tsk; + return 0; } SEC("fexit/__set_task_comm") int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) { - return !tsk; + return 0; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 8da77cda5f4a..305fae8f80a9 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -2854,7 +2854,7 @@ static struct btf_raw_test raw_tests[] = { .value_type_id = 1, .max_entries = 4, .btf_load_err = true, - .err_str = "vlen != 0", + .err_str = "Invalid func linkage", }, { diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index a253a064e6e0..58f4aa593b1b 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -238,7 +238,7 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), /* r1 = [0x00, 0xff] */ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), @@ -253,10 +253,6 @@ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] */ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = 0 or - * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] - */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), /* error on OOB pointer computation */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), /* exit */ @@ -265,8 +261,10 @@ }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr = "value 72057594021150720 makes map_value pointer be out of bounds", - .result = REJECT + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", + .result_unpriv = REJECT, + .errstr = "value -4294967168 makes map_value pointer be out of bounds", + .result = REJECT, }, { "bounds check after truncation of boundary-crossing range (2)", @@ -276,7 +274,7 @@ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), /* r1 = [0x00, 0xff] */ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1), @@ -293,10 +291,6 @@ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff] */ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1), - /* r1 = 0 or - * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff] - */ - BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8), /* error on OOB pointer computation */ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), /* exit */ @@ -305,8 +299,10 @@ }, .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ - .errstr = "value 72057594021150720 makes map_value pointer be out of bounds", - .result = REJECT + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root", + .result_unpriv = REJECT, + .errstr = "value -4294967168 makes map_value pointer be out of bounds", + .result = REJECT, }, { "bounds check after wrapping 32-bit addition", @@ -539,3 +535,25 @@ }, .result = ACCEPT }, +{ + "assigning 32bit bounds to 64bit for wA = 0, wB = wA", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV32_IMM(BPF_REG_9, 0), + BPF_MOV32_REG(BPF_REG_2, BPF_REG_9), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_8, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_6, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 7276620ef242..8bfeb77c60bd 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -315,3 +315,43 @@ }, .result = ACCEPT, }, +{ + "store PTR_TO_STACK in R10 to array map using BPF_B", + .insns = { + /* Load pointer to map. */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* Copy R10 to R9. */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_10), + /* Pollute other registers with unaligned values. */ + BPF_MOV64_IMM(BPF_REG_2, -1), + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_MOV64_IMM(BPF_REG_4, -1), + BPF_MOV64_IMM(BPF_REG_5, -1), + BPF_MOV64_IMM(BPF_REG_6, -1), + BPF_MOV64_IMM(BPF_REG_7, -1), + BPF_MOV64_IMM(BPF_REG_8, -1), + /* Store both R9 and R10 with BPF_B and read back. */ + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0), + /* Should read back as same value. */ + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 42, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c index 7f6c232cd842..ed1c2cea1dea 100644 --- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c +++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c @@ -88,6 +88,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_48b = { 3 }, + .errstr_unpriv = "leaking pointer from stack off -8", .errstr = "R0 invalid mem access 'inv'", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index cd5e1f602ac9..909da9cdda97 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -351,6 +351,7 @@ static int test_alloc_errors(char *heap_name) } printf("Expected error checking passed\n"); + ret = 0; out: if (dmabuf_fd >= 0) close(dmabuf_fd); diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 24dd8ed48580..b025daea062d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -300,7 +300,7 @@ test_uc_aware() local i for ((i = 0; i < attempts; ++i)); do - if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then + if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then ((passes++)) fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index dbd1e014ba17..da49ad2761b5 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -264,6 +264,8 @@ trap_policer_test() local packets_t0 local packets_t1 + RET=0 + if [ $(devlink_trap_policers_num_get) -eq 0 ]; then check_err 1 "Failed to dump policers" fi @@ -328,6 +330,8 @@ trap_group_check_policer() trap_policer_bind_test() { + RET=0 + devlink trap group set $DEVLINK_DEV group l2_drops policer 1 check_err $? "Failed to bind a valid policer" if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index 11eee0b60040..d979ff14775a 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include <poll.h> #include <unistd.h> +#include <assert.h> #include <signal.h> #include <pthread.h> #include <sys/epoll.h> @@ -3136,4 +3137,149 @@ TEST(epoll59) close(ctx.sfd[0]); } +enum { + EPOLL60_EVENTS_NR = 10, +}; + +struct epoll60_ctx { + volatile int stopped; + int ready; + int waiters; + int epfd; + int evfd[EPOLL60_EVENTS_NR]; +}; + +static void *epoll60_wait_thread(void *ctx_) +{ + struct epoll60_ctx *ctx = ctx_; + struct epoll_event e; + sigset_t sigmask; + uint64_t v; + int ret; + + /* Block SIGUSR1 */ + sigemptyset(&sigmask); + sigaddset(&sigmask, SIGUSR1); + sigprocmask(SIG_SETMASK, &sigmask, NULL); + + /* Prepare empty mask for epoll_pwait() */ + sigemptyset(&sigmask); + + while (!ctx->stopped) { + /* Mark we are ready */ + __atomic_fetch_add(&ctx->ready, 1, __ATOMIC_ACQUIRE); + + /* Start when all are ready */ + while (__atomic_load_n(&ctx->ready, __ATOMIC_ACQUIRE) && + !ctx->stopped); + + /* Account this waiter */ + __atomic_fetch_add(&ctx->waiters, 1, __ATOMIC_ACQUIRE); + + ret = epoll_pwait(ctx->epfd, &e, 1, 2000, &sigmask); + if (ret != 1) { + /* We expect only signal delivery on stop */ + assert(ret < 0 && errno == EINTR && "Lost wakeup!\n"); + assert(ctx->stopped); + break; + } + + ret = read(e.data.fd, &v, sizeof(v)); + /* Since we are on ET mode, thus each thread gets its own fd. */ + assert(ret == sizeof(v)); + + __atomic_fetch_sub(&ctx->waiters, 1, __ATOMIC_RELEASE); + } + + return NULL; +} + +static inline unsigned long long msecs(void) +{ + struct timespec ts; + unsigned long long msecs; + + clock_gettime(CLOCK_REALTIME, &ts); + msecs = ts.tv_sec * 1000ull; + msecs += ts.tv_nsec / 1000000ull; + + return msecs; +} + +static inline int count_waiters(struct epoll60_ctx *ctx) +{ + return __atomic_load_n(&ctx->waiters, __ATOMIC_ACQUIRE); +} + +TEST(epoll60) +{ + struct epoll60_ctx ctx = { 0 }; + pthread_t waiters[ARRAY_SIZE(ctx.evfd)]; + struct epoll_event e; + int i, n, ret; + + signal(SIGUSR1, signal_handler); + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + + /* Create event fds */ + for (i = 0; i < ARRAY_SIZE(ctx.evfd); i++) { + ctx.evfd[i] = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd[i], 0); + + e.events = EPOLLIN | EPOLLET; + e.data.fd = ctx.evfd[i]; + ASSERT_EQ(epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd[i], &e), 0); + } + + /* Create waiter threads */ + for (i = 0; i < ARRAY_SIZE(waiters); i++) + ASSERT_EQ(pthread_create(&waiters[i], NULL, + epoll60_wait_thread, &ctx), 0); + + for (i = 0; i < 300; i++) { + uint64_t v = 1, ms; + + /* Wait for all to be ready */ + while (__atomic_load_n(&ctx.ready, __ATOMIC_ACQUIRE) != + ARRAY_SIZE(ctx.evfd)) + ; + + /* Steady, go */ + __atomic_fetch_sub(&ctx.ready, ARRAY_SIZE(ctx.evfd), + __ATOMIC_ACQUIRE); + + /* Wait all have gone to kernel */ + while (count_waiters(&ctx) != ARRAY_SIZE(ctx.evfd)) + ; + + /* 1ms should be enough to schedule away */ + usleep(1000); + + /* Quickly signal all handles at once */ + for (n = 0; n < ARRAY_SIZE(ctx.evfd); n++) { + ret = write(ctx.evfd[n], &v, sizeof(v)); + ASSERT_EQ(ret, sizeof(v)); + } + + /* Busy loop for 1s and wait for all waiters to wake up */ + ms = msecs(); + while (count_waiters(&ctx) && msecs() < ms + 1000) + ; + + ASSERT_EQ(count_waiters(&ctx), 0); + } + ctx.stopped = 1; + /* Stop waiters */ + for (i = 0; i < ARRAY_SIZE(waiters); i++) + ret = pthread_kill(waiters[i], SIGUSR1); + for (i = 0; i < ARRAY_SIZE(waiters); i++) + pthread_join(waiters[i], NULL); + + for (i = 0; i < ARRAY_SIZE(waiters); i++) + close(ctx.evfd[i]); + close(ctx.epfd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config index c2c8de4fafff..e59d985eeff0 100644 --- a/tools/testing/selftests/ftrace/config +++ b/tools/testing/selftests/ftrace/config @@ -11,5 +11,6 @@ CONFIG_PREEMPTIRQ_DELAY_TEST=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_SAMPLES=y +CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_KALLSYMS_ALL=y diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 063ecb290a5a..a4605b5ee66d 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -17,6 +17,7 @@ echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" echo " -vvv Alias of -v -v -v (Show all commands immediately)" echo " --fail-unsupported Treat UNSUPPORTED as a failure" +echo " --fail-unresolved Treat UNRESOLVED as a failure" echo " -d|--debug Debug mode (trace all shell commands)" echo " -l|--logdir <dir> Save logs on the <dir>" echo " If <dir> is -, all logs output in console only" @@ -29,8 +30,25 @@ err_ret=1 # kselftest skip code is 4 err_skip=4 +# cgroup RT scheduling prevents chrt commands from succeeding, which +# induces failures in test wakeup tests. Disable for the duration of +# the tests. + +readonly sched_rt_runtime=/proc/sys/kernel/sched_rt_runtime_us + +sched_rt_runtime_orig=$(cat $sched_rt_runtime) + +setup() { + echo -1 > $sched_rt_runtime +} + +cleanup() { + echo $sched_rt_runtime_orig > $sched_rt_runtime +} + errexit() { # message echo "Error: $1" 1>&2 + cleanup exit $err_ret } @@ -39,6 +57,8 @@ if [ `id -u` -ne 0 ]; then errexit "this must be run by root user" fi +setup + # Utilities absdir() { # file_path (cd `dirname $1`; pwd) @@ -93,6 +113,10 @@ parse_opts() { # opts UNSUPPORTED_RESULT=1 shift 1 ;; + --fail-unresolved) + UNRESOLVED_RESULT=1 + shift 1 + ;; --logdir|-l) LOG_DIR=$2 shift 2 @@ -157,6 +181,7 @@ KEEP_LOG=0 DEBUG=0 VERBOSE=0 UNSUPPORTED_RESULT=0 +UNRESOLVED_RESULT=0 STOP_FAILURE=0 # Parse command-line options parse_opts $* @@ -235,6 +260,7 @@ TOTAL_RESULT=0 INSTANCE= CASENO=0 + testcase() { # testfile CASENO=$((CASENO+1)) desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:` @@ -260,7 +286,7 @@ eval_result() { # sigval $UNRESOLVED) prlog " [${color_blue}UNRESOLVED${color_reset}]" UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO" - return 1 # this is a kind of bug.. something happened. + return $UNRESOLVED_RESULT # depends on use case ;; $UNTESTED) prlog " [${color_blue}UNTESTED${color_reset}]" @@ -273,7 +299,7 @@ eval_result() { # sigval return $UNSUPPORTED_RESULT # depends on use case ;; $XFAIL) - prlog " [${color_red}XFAIL${color_reset}]" + prlog " [${color_green}XFAIL${color_reset}]" XFAILED_CASES="$XFAILED_CASES $CASENO" return 0 ;; @@ -406,5 +432,7 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w` prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w` prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w` +cleanup + # if no error, return 0 exit $TOTAL_RESULT diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index aefab0c66d54..f59853857ad2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -10,10 +10,7 @@ if ! grep -q function_graph available_tracers; then exit_unsupported fi -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is dynamic ftrace not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter do_reset() { if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index c8a5209f2119..d610f47edd90 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -9,6 +9,8 @@ if ! grep -q function_graph available_tracers; then exit_unsupported fi +check_filter_file set_ftrace_filter + fail() { # msg echo $1 exit_fail diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index f4e92afab14b..28936f434ee5 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -9,6 +9,8 @@ if ! grep -q function available_tracers; then exit_unsupported fi +check_filter_file set_ftrace_filter + disable_tracing clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index 8aa46a2ea133..71db68a7975f 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -15,10 +15,7 @@ if [ ! -f set_ftrace_notrace_pid ]; then exit_unsupported fi -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is function tracer not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter do_function_fork=1 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index f2ee1e889e13..d58403c4b7cd 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -16,10 +16,7 @@ if [ ! -f set_ftrace_pid ]; then exit_unsupported fi -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is function tracer not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter do_function_fork=1 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 1a52f2883fe0..b2aff786c1a2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -3,7 +3,7 @@ # description: ftrace - stacktrace filter command # flags: instance -[ ! -f set_ftrace_filter ] && exit_unsupported +check_filter_file set_ftrace_filter echo _do_fork:stacktrace >> set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index ca2ffd7957f9..e9b1fd534e96 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -11,10 +11,7 @@ # # The triggers are set within the set_ftrace_filter file -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is dynamic ftrace not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter do_reset() { reset_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc index 9330c873f9fe..1a4b4a442d33 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: ftrace - function trace on module -[ ! -f set_ftrace_filter ] && exit_unsupported +check_filter_file set_ftrace_filter : "mod: allows to filter a non exist function" echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index dfbae637c60c..a3dadb6b93b4 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -18,10 +18,7 @@ if ! grep -q function_graph available_tracers; then exit_unsupported; fi -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is dynamic ftrace not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter if [ ! -f function_profile_enabled ]; then echo "function_profile_enabled not found, function profiling enabled?" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 51f6e6146bd9..70bad441fa7d 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -10,10 +10,7 @@ # # The triggers are set within the set_ftrace_filter file -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is dynamic ftrace not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter fail() { # mesg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc index b414f0e3c646..51e9e80bc0e6 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc @@ -8,6 +8,8 @@ if [ ! -f stack_trace ]; then exit_unsupported fi +check_filter_file stack_trace_filter + echo > stack_trace_filter echo 0 > stack_max_size echo 1 > /proc/sys/kernel/stack_tracer_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc index 1947387fe976..3ed173f2944a 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc @@ -11,10 +11,7 @@ # # The triggers are set within the set_ftrace_filter file -if [ ! -f set_ftrace_filter ]; then - echo "set_ftrace_filter not found? Is dynamic ftrace not set?" - exit_unsupported -fi +check_filter_file set_ftrace_filter fail() { # mesg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5d4550591ff9..61a3c7e2634d 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -1,3 +1,9 @@ +check_filter_file() { # check filter file introduced by dynamic ftrace + if [ ! -f "$1" ]; then + echo "$1 not found? Is dynamic ftrace not set?" + exit_unsupported + fi +} clear_trace() { # reset trace output echo > trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 1bcb67dcae26..81490ecaaa92 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -38,7 +38,7 @@ for width in 64 32 16 8; do echo 0 > events/kprobes/testprobe/enable : "Confirm the arguments is recorded in given types correctly" - ARGS=`grep "testprobe" trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'` + ARGS=`grep "testprobe" trace | head -n 1 | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'` check_types $ARGS $width : "Clear event for next loop" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 7650a82db3f5..df5072815b87 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,6 +5,8 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable grep "function" available_tracers || exit_unsupported # this is configurable +check_filter_file set_ftrace_filter + # prepare echo nop > current_tracer echo _do_fork > set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc index cbd174334a48..2b82c80edf69 100644 --- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc +++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc @@ -17,7 +17,14 @@ unsup() { #msg exit_unsupported } -modprobe $MOD || unsup "$MOD module not available" +unres() { #msg + reset_tracer + rmmod $MOD || true + echo $1 + exit_unresolved +} + +modprobe $MOD || unres "$MOD module not available" rmmod $MOD grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled" diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 0bb80619db58..32bdc978a711 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,13 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 -MOUNT_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null) -MOUNT_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null) -ifeq ($(MOUNT_LDLIBS),) -MOUNT_LDLIBS := -lmount -I/usr/include/libmount +VAR_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null) +VAR_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS := -lmount -I/usr/include/libmount endif -CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(MOUNT_CFLAGS) -LDLIBS += $(MOUNT_LDLIBS) +CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(VAR_CFLAGS) +LDLIBS += $(VAR_LDLIBS) TEST_PROGS := gpio-mockup.sh TEST_FILES := gpio-mockup-sysfs.sh diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 7340fd6a9a9f..39f0fa2a8fd6 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE -LDLIBS := $(LDLIBS) -lm +LDLIBS += -lm uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index 4c156aeab6b8..5ec4d9e18806 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -137,7 +137,7 @@ int dump_queue(struct msgque_data *msgque) for (kern_id = 0; kern_id < 256; kern_id++) { ret = msgctl(kern_id, MSG_STAT, &ds); if (ret < 0) { - if (errno == -EINVAL) + if (errno == EINVAL) continue; printf("Failed to get stats for IPC queue with id %d\n", kern_id); diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index e84d901f8567..676b3a8b114d 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -33,7 +33,7 @@ tap_timeout() { # Make sure tests will time out if utility is available. if [ -x /usr/bin/timeout ] ; then - /usr/bin/timeout "$kselftest_timeout" "$1" + /usr/bin/timeout --foreground "$kselftest_timeout" "$1" else "$1" fi diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh new file mode 100755 index 000000000000..bbc04646346b --- /dev/null +++ b/tools/testing/selftests/kselftest_deps.sh @@ -0,0 +1,272 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# kselftest_deps.sh +# +# Checks for kselftest build dependencies on the build system. +# Copyright (c) 2020 Shuah Khan <skhan@linuxfoundation.org> +# +# + +usage() +{ + +echo -e "Usage: $0 -[p] <compiler> [test_name]\n" +echo -e "\tkselftest_deps.sh [-p] gcc" +echo -e "\tkselftest_deps.sh [-p] gcc vm" +echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc" +echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc vm\n" +echo "- Should be run in selftests directory in the kernel repo." +echo "- Checks if Kselftests can be built/cross-built on a system." +echo "- Parses all test/sub-test Makefile to find library dependencies." +echo "- Runs compile test on a trivial C file with LDLIBS specified" +echo " in the test Makefiles to identify missing library dependencies." +echo "- Prints suggested target list for a system filtering out tests" +echo " failed the build dependency check from the TARGETS in Selftests" +echo " main Makefile when optional -p is specified." +echo "- Prints pass/fail dependency check for each tests/sub-test." +echo "- Prints pass/fail targets and libraries." +echo "- Default: runs dependency checks on all tests." +echo "- Optional test name can be specified to check dependencies for it." +exit 1 + +} + +# Start main() +main() +{ + +base_dir=`pwd` +# Make sure we're in the selftests top-level directory. +if [ $(basename "$base_dir") != "selftests" ]; then + echo -e "\tPlease run $0 in" + echo -e "\ttools/testing/selftests directory ..." + exit 1 +fi + +print_targets=0 + +while getopts "p" arg; do + case $arg in + p) + print_targets=1 + shift;; + esac +done + +if [ $# -eq 0 ] +then + usage +fi + +# Compiler +CC=$1 + +tmp_file=$(mktemp).c +trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT +#echo $tmp_file + +pass=$(mktemp).out +trap "rm -f $pass" EXIT +#echo $pass + +fail=$(mktemp).out +trap "rm -f $fail" EXIT +#echo $fail + +# Generate tmp source fire for compile test +cat << "EOF" > $tmp_file +int main() +{ +} +EOF + +# Save results +total_cnt=0 +fail_trgts=() +fail_libs=() +fail_cnt=0 +pass_trgts=() +pass_libs=() +pass_cnt=0 + +# Get all TARGETS from selftests Makefile +targets=$(egrep "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2) + +# Single test case +if [ $# -eq 2 ] +then + test=$2/Makefile + + l1_test $test + l2_test $test + l3_test $test + + print_results $1 $2 + exit $? +fi + +# Level 1: LDLIBS set static. +# +# Find all LDLIBS set statically for all executables built by a Makefile +# and filter out VAR_LDLIBS to discard the following: +# gpio/Makefile:LDLIBS += $(VAR_LDLIBS) +# Append space at the end of the list to append more tests. + +l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \ + grep -v "VAR_LDLIBS" | awk -F: '{print $1}') + +# Level 2: LDLIBS set dynamically. +# +# Level 2 +# Some tests have multiple valid LDLIBS lines for individual sub-tests +# that need dependency checks. Find them and append them to the tests +# e.g: vm/Makefile:$(OUTPUT)/userfaultfd: LDLIBS += -lpthread +# Filter out VAR_LDLIBS to discard the following: +# memfd/Makefile:$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS) +# Append space at the end of the list to append more tests. + +l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \ + grep -v "VAR_LDLIBS" | awk -F: '{print $1}') + +# Level 3 +# gpio, memfd and others use pkg-config to find mount and fuse libs +# respectively and save it in VAR_LDLIBS. If pkg-config doesn't find +# any, VAR_LDLIBS set to default. +# Use the default value and filter out pkg-config for dependency check. +# e.g: +# gpio/Makefile +# VAR_LDLIBS := $(shell pkg-config --libs mount) 2>/dev/null) +# memfd/Makefile +# VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) + +l3_tests=$(grep -r --include=Makefile "^VAR_LDLIBS" | \ + grep -v "pkg-config" | awk -F: '{print $1}') + +#echo $l1_tests +#echo $l2_1_tests +#echo $l3_tests + +all_tests +print_results $1 $2 + +exit $? +} +# end main() + +all_tests() +{ + for test in $l1_tests; do + l1_test $test + done + + for test in $l2_tests; do + l2_test $test + done + + for test in $l3_tests; do + l3_test $test + done +} + +# Use same parsing used for l1_tests and pick libraries this time. +l1_test() +{ + test_libs=$(grep --include=Makefile "^LDLIBS" $test | \ + grep -v "VAR_LDLIBS" | \ + sed -e 's/\:/ /' | \ + sed -e 's/+/ /' | cut -d "=" -f 2) + + check_libs $test $test_libs +} + +# Use same parsing used for l2__tests and pick libraries this time. +l2_test() +{ + test_libs=$(grep --include=Makefile ": LDLIBS" $test | \ + grep -v "VAR_LDLIBS" | \ + sed -e 's/\:/ /' | sed -e 's/+/ /' | \ + cut -d "=" -f 2) + + check_libs $test $test_libs +} + +l3_test() +{ + test_libs=$(grep --include=Makefile "^VAR_LDLIBS" $test | \ + grep -v "pkg-config" | sed -e 's/\:/ /' | + sed -e 's/+/ /' | cut -d "=" -f 2) + + check_libs $test $test_libs +} + +check_libs() +{ + +if [[ ! -z "${test_libs// }" ]] +then + + #echo $test_libs + + for lib in $test_libs; do + + let total_cnt+=1 + $CC -o $tmp_file.bin $lib $tmp_file > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "FAIL: $test dependency check: $lib" >> $fail + let fail_cnt+=1 + fail_libs+="$lib " + fail_target=$(echo "$test" | cut -d "/" -f1) + fail_trgts+="$fail_target " + targets=$(echo "$targets" | grep -v "$fail_target") + else + echo "PASS: $test dependency check passed $lib" >> $pass + let pass_cnt+=1 + pass_libs+="$lib " + pass_trgts+="$(echo "$test" | cut -d "/" -f1) " + fi + + done +fi +} + +print_results() +{ + echo -e "========================================================"; + echo -e "Kselftest Dependency Check for [$0 $1 $2] results..." + + if [ $print_targets -ne 0 ] + then + echo -e "Suggested Selftest Targets for your configuration:" + echo -e "$targets"; + fi + + echo -e "========================================================"; + echo -e "Checked tests defining LDLIBS dependencies" + echo -e "--------------------------------------------------------"; + echo -e "Total tests with Dependencies:" + echo -e "$total_cnt Pass: $pass_cnt Fail: $fail_cnt"; + + if [ $pass_cnt -ne 0 ]; then + echo -e "--------------------------------------------------------"; + cat $pass + echo -e "--------------------------------------------------------"; + echo -e "Targets passed build dependency check on system:" + echo -e "$(echo "$pass_trgts" | xargs -n1 | sort -u | xargs)" + fi + + if [ $fail_cnt -ne 0 ]; then + echo -e "--------------------------------------------------------"; + cat $fail + echo -e "--------------------------------------------------------"; + echo -e "Targets failed build dependency check on system:" + echo -e "$(echo "$fail_trgts" | xargs -n1 | sort -u | xargs)" + echo -e "--------------------------------------------------------"; + echo -e "Missing libraries system" + echo -e "$(echo "$fail_libs" | xargs -n1 | sort -u | xargs)" + fi + + echo -e "--------------------------------------------------------"; + echo -e "========================================================"; +} + +main "$@" diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2902f6a78f8a..2bb8c81fc0b4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -705,7 +705,7 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) /* Sanity check handler execution environment. */ if (!t) { fprintf(TH_LOG_STREAM, - "no active test in SIGARLM handler!?\n"); + "no active test in SIGALRM handler!?\n"); abort(); } if (sig != SIGALRM || sig != info->si_signo) { @@ -731,7 +731,7 @@ void __wait_for_test(struct __test_metadata *t) if (sigaction(SIGALRM, &action, &saved_action)) { t->passed = 0; fprintf(TH_LOG_STREAM, - "%s: unable to install SIGARLM handler\n", + "%s: unable to install SIGALRM handler\n", t->name); return; } @@ -743,7 +743,7 @@ void __wait_for_test(struct __test_metadata *t) if (sigaction(SIGALRM, &saved_action, NULL)) { t->passed = 0; fprintf(TH_LOG_STREAM, - "%s: unable to uninstall SIGARLM handler\n", + "%s: unable to uninstall SIGALRM handler\n", t->name); return; } diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 712a2ddd2a27..42f4f49f2a48 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -5,8 +5,34 @@ all: top_srcdir = ../../../.. KSFT_KHDR_INSTALL := 1 + +# For cross-builds to work, UNAME_M has to map to ARCH and arch specific +# directories and targets in this Makefile. "uname -m" doesn't map to +# arch specific sub-directory names. +# +# UNAME_M variable to used to run the compiles pointing to the right arch +# directories and build the right targets for these supported architectures. +# +# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable. +# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable. +# +# x86_64 targets are named to include x86_64 as a suffix and directories +# for includes are in x86_64 sub-directory. s390x and aarch64 follow the +# same convention. "uname -m" doesn't result in the correct mapping for +# s390x and aarch64. +# +# No change necessary for x86_64 UNAME_M := $(shell uname -m) +# Set UNAME_M for arm64 compile/install to work +ifeq ($(ARCH),arm64) + UNAME_M := aarch64 +endif +# Set UNAME_M s390x compile/install to work +ifeq ($(ARCH),s390) + UNAME_M := s390x +endif + LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c @@ -28,6 +54,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test @@ -53,7 +80,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ @@ -84,6 +111,7 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) $(AR) crs $@ $^ +x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) all: $(STATIC_LIBS) $(TEST_GEN_PROGS): $(STATIC_LIBS) diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h index d8f4d6bfe05d..a034438b6266 100644 --- a/tools/testing/selftests/kvm/include/evmcs.h +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -219,8 +219,8 @@ struct hv_enlightened_vmcs { #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) -struct hv_enlightened_vmcs *current_evmcs; -struct hv_vp_assist_page *current_vp_assist; +extern struct hv_enlightened_vmcs *current_evmcs; +extern struct hv_vp_assist_page *current_vp_assist; int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a99b875f50d2..92e184a422ee 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -143,6 +143,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 8a3523d4434f..9622431069bc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1201,6 +1201,15 @@ void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) ret, errno); } +void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_guest_debug *debug) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug); + + TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret); +} + /* * VM VCPU Set MP State * diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 6f17f69394be..4ae104f6ce69 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -17,6 +17,9 @@ bool enable_evmcs; +struct hv_enlightened_vmcs *current_evmcs; +struct hv_vp_assist_page *current_vp_assist; + struct eptPageTableEntry { uint64_t readable:1; uint64_t writable:1; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c new file mode 100644 index 000000000000..8162c58a1234 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM guest debug register tests + * + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <stdio.h> +#include <string.h> +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 + +#define DR6_BD (1 << 13) +#define DR7_GD (1 << 13) + +/* For testing data access debug BP */ +uint32_t guest_value; + +extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start; + +static void guest_code(void) +{ + /* + * Software BP tests. + * + * NOTE: sw_bp need to be before the cmd here, because int3 is an + * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we + * capture it using the vcpu exception bitmap). + */ + asm volatile("sw_bp: int3"); + + /* Hardware instruction BP test */ + asm volatile("hw_bp: nop"); + + /* Hardware data BP test */ + asm volatile("mov $1234,%%rax;\n\t" + "mov %%rax,%0;\n\t write_data:" + : "=m" (guest_value) : : "rax"); + + /* Single step test, covers 2 basic instructions and 2 emulated */ + asm volatile("ss_start: " + "xor %%rax,%%rax\n\t" + "cpuid\n\t" + "movl $0x1a0,%%ecx\n\t" + "rdmsr\n\t" + : : : "rax", "ecx"); + + /* DR6.BD test */ + asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax"); + GUEST_DONE(); +} + +#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug)) +#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug) +#define CAST_TO_RIP(v) ((unsigned long long)&(v)) +#define SET_RIP(v) do { \ + vcpu_regs_get(vm, VCPU_ID, ®s); \ + regs.rip = (v); \ + vcpu_regs_set(vm, VCPU_ID, ®s); \ + } while (0) +#define MOVE_RIP(v) SET_RIP(regs.rip + (v)); + +int main(void) +{ + struct kvm_guest_debug debug; + unsigned long long target_dr6, target_rip; + struct kvm_regs regs; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + uint64_t cmd; + int i; + /* Instruction lengths starting at ss_start */ + int ss_size[4] = { + 3, /* xor */ + 2, /* cpuid */ + 5, /* mov */ + 2, /* rdmsr */ + }; + + if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) { + print_skip("KVM_CAP_SET_GUEST_DEBUG not supported"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + run = vcpu_state(vm, VCPU_ID); + + /* Test software BPs - int3 */ + CLEAR_DEBUG(); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; + APPLY_DEBUG(); + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == BP_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(sw_bp), + "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)", + run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, CAST_TO_RIP(sw_bp)); + MOVE_RIP(1); + + /* Test instruction HW BP over DR[0-3] */ + for (i = 0; i < 4; i++) { + CLEAR_DEBUG(); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp); + debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1)); + APPLY_DEBUG(); + vcpu_run(vm, VCPU_ID); + target_dr6 = 0xffff0ff0 | (1UL << i); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(hw_bp) && + run->debug.arch.dr6 == target_dr6, + "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + i, run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, CAST_TO_RIP(hw_bp), + run->debug.arch.dr6, target_dr6); + } + /* Skip "nop" */ + MOVE_RIP(1); + + /* Test data access HW BP over DR[0-3] */ + for (i = 0; i < 4; i++) { + CLEAR_DEBUG(); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[i] = CAST_TO_RIP(guest_value); + debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) | + (0x000d0000UL << (4*i)); + APPLY_DEBUG(); + vcpu_run(vm, VCPU_ID); + target_dr6 = 0xffff0ff0 | (1UL << i); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(write_data) && + run->debug.arch.dr6 == target_dr6, + "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + i, run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, CAST_TO_RIP(write_data), + run->debug.arch.dr6, target_dr6); + /* Rollback the 4-bytes "mov" */ + MOVE_RIP(-7); + } + /* Skip the 4-bytes "mov" */ + MOVE_RIP(7); + + /* Test single step */ + target_rip = CAST_TO_RIP(ss_start); + target_dr6 = 0xffff4ff0ULL; + vcpu_regs_get(vm, VCPU_ID, ®s); + for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { + target_rip += ss_size[i]; + CLEAR_DEBUG(); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + debug.arch.debugreg[7] = 0x00000400; + APPLY_DEBUG(); + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == target_rip && + run->debug.arch.dr6 == target_dr6, + "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + i, run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, target_rip, run->debug.arch.dr6, + target_dr6); + } + + /* Finally test global disable */ + CLEAR_DEBUG(); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + debug.arch.debugreg[7] = 0x400 | DR7_GD; + APPLY_DEBUG(); + vcpu_run(vm, VCPU_ID); + target_dr6 = 0xffff0ff0 | DR6_BD; + TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && + run->debug.arch.exception == DB_VECTOR && + run->debug.arch.pc == CAST_TO_RIP(bd_start) && + run->debug.arch.dr6 == target_dr6, + "DR7.GD: exit %d exception %d rip 0x%llx " + "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", + run->exit_reason, run->debug.arch.exception, + run->debug.arch.pc, target_rip, run->debug.arch.dr6, + target_dr6); + + /* Disable all debug controls, run to the end */ + CLEAR_DEBUG(); + APPLY_DEBUG(); + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO"); + cmd = get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE"); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index dadf819148a4..ee64ff8df8f4 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -25,13 +25,13 @@ fi # Figure out which test to run from our script name. test=$(basename $0 .sh) # Look up details about the test from master list of LKDTM tests. -line=$(egrep '^#?'"$test"'\b' tests.txt) +line=$(grep -E '^#?'"$test"'\b' tests.txt) if [ -z "$line" ]; then echo "Skipped: missing test '$test' in tests.txt" exit $KSELFTEST_SKIP_TEST fi # Check that the test is known to LKDTM. -if ! egrep -q '^'"$test"'$' "$TRIGGER" ; then +if ! grep -E -q '^'"$test"'$' "$TRIGGER" ; then echo "Skipped: test '$test' missing in $TRIGGER!" exit $KSELFTEST_SKIP_TEST fi @@ -59,30 +59,32 @@ if [ -z "$expect" ]; then expect="call trace:" fi -# Clear out dmesg for output reporting -dmesg -c >/dev/null - # Prepare log for report checking -LOG=$(mktemp --tmpdir -t lkdtm-XXXXXX) +LOG=$(mktemp --tmpdir -t lkdtm-log-XXXXXX) +DMESG=$(mktemp --tmpdir -t lkdtm-dmesg-XXXXXX) cleanup() { - rm -f "$LOG" + rm -f "$LOG" "$DMESG" } trap cleanup EXIT +# Save existing dmesg so we can detect new content below +dmesg > "$DMESG" + # Most shells yell about signals and we're expecting the "cat" process # to usually be killed by the kernel. So we have to run it in a sub-shell # and silence errors. ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true # Record and dump the results -dmesg -c >"$LOG" +dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true + cat "$LOG" # Check for expected output -if egrep -qi "$expect" "$LOG" ; then +if grep -E -qi "$expect" "$LOG" ; then echo "$test: saw '$expect': ok" exit 0 else - if egrep -qi XFAIL: "$LOG" ; then + if grep -E -qi XFAIL: "$LOG" ; then echo "$test: saw 'XFAIL': [SKIP]" exit $KSELFTEST_SKIP_TEST else diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 0a15f9e23431..4da8b565fa32 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -4,14 +4,25 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ -TEST_GEN_PROGS := memfd_test fuse_test fuse_mnt +TEST_GEN_PROGS := memfd_test TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh +TEST_GEN_FILES := fuse_test fuse_mnt -fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) +VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse +endif + +VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS := -lfuse -pthread +endif + +fuse_mnt.o: CFLAGS += $(VAR_CFLAGS) include ../lib.mk -$(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs) +$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS) $(OUTPUT)/memfd_test: memfd_test.c common.c $(OUTPUT)/fuse_test: fuse_test.c common.c diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 796670ebc65b..6560ed796ac4 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -749,6 +749,29 @@ ipv4_fcnal_runtime() run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" log_test $? 0 "Ping - multipath" + run_cmd "$IP ro delete 172.16.101.1/32 nhid 122" + + # + # multiple default routes + # - tests fib_select_default + run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1" + run_cmd "$IP ro add default nhid 501" + run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20" + run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + log_test $? 0 "Ping - multiple default routes, nh first" + + # flip the order + run_cmd "$IP ro del default nhid 501" + run_cmd "$IP ro del default via 172.16.1.3 dev veth1 metric 20" + run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20" + run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1" + run_cmd "$IP ro add default nhid 501 metric 20" + run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" + log_test $? 0 "Ping - multiple default routes, nh second" + + run_cmd "$IP nexthop delete nhid 501" + run_cmd "$IP ro del default" + # # IPv4 with blackhole nexthops # diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index b7616704b55e..84205c3a55eb 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -618,16 +618,22 @@ fib_nexthop_test() fib_suppress_test() { + echo + echo "FIB rule with suppress_prefixlength" + setup + $IP link add dummy1 type dummy $IP link set dummy1 up $IP -6 route add default dev dummy1 $IP -6 rule add table main suppress_prefixlength 0 - ping -f -c 1000 -W 1 1234::1 || true + ping -f -c 1000 -W 1 1234::1 >/dev/null 2>&1 $IP -6 rule del table main suppress_prefixlength 0 $IP link del dummy1 # If we got here without crashing, we're good. - return 0 + log_test 0 0 "FIB rule suppress test" + + cleanup } ################################################################################ diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 9172746b6cf0..15f4f46ca3a9 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -30,7 +30,7 @@ ret=0 cleanup() { - rm -f $out + rm -f $err ip netns del $ns1 } diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 35505b31e5cc..4555f88252ba 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -165,9 +165,10 @@ void *child_thread(void *arg) socklen_t zc_len = sizeof(zc); int res; + memset(&zc, 0, sizeof(zc)); zc.address = (__u64)((unsigned long)addr); zc.length = chunk_size; - zc.recv_skip_hint = 0; + res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len); if (res == -1) @@ -281,12 +282,14 @@ static void setup_sockaddr(int domain, const char *str_addr, static void do_accept(int fdlisten) { pthread_attr_t attr; + int rcvlowat; pthread_attr_init(&attr); pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + rcvlowat = chunk_size; if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT, - &chunk_size, sizeof(chunk_size)) == -1) { + &rcvlowat, sizeof(rcvlowat)) == -1) { perror("setsockopt SO_RCVLOWAT"); } diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c index e0d86e1668c0..e3c772c6a7c7 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/nsfs/pidns.c @@ -27,7 +27,7 @@ #define __stack_aligned__ __attribute__((aligned(16))) struct cr_clone_arg { char stack[128] __stack_aligned__; - char stack_ptr[0]; + char stack_ptr[]; }; static int child(void *args) diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests index 1cef54458aff..2aa9a3852a84 100755 --- a/tools/testing/selftests/pstore/pstore_tests +++ b/tools/testing/selftests/pstore/pstore_tests @@ -10,7 +10,7 @@ . ./common_tests prlog -n "Checking pstore console is registered ... " -dmesg | grep -q "console \[pstore" +dmesg | grep -Eq "console \[(pstore|${backend})" show_result $? prlog -n "Checking /dev/pmsg0 exists ... " diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh new file mode 100755 index 000000000000..e5cc6b2f195e --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# If this was a KCSAN run, collapse the reports in the various console.log +# files onto pairs of functions. +# +# Usage: kcsan-collapse.sh resultsdir +# +# Copyright (C) 2020 Facebook, Inc. +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +if test -z "$TORTURE_KCONFIG_KCSAN_ARG" +then + exit 0 +fi +cat $1/*/console.log | + grep "BUG: KCSAN: " | + sed -e 's/^\[[^]]*] //' | + sort | + uniq -c | + sort -k1nr > $1/kcsan.sum diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 9d9a41625dd9..1706cd4466b4 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -41,7 +41,21 @@ else title="$title ($ngpsps/s)" fi echo $title $stopstate $fwdprog - nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` + nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | \ + awk -v sum=0 ' + { + for (i = 0; i <= NF; i++) { + sum += $i; + if ($i ~ /Batch:/) { + sum = 0; + i = i + 2; + } + } + } + + END { + print sum + }'` if test -z "$nclosecalls" then exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 0326f4a5ff9c..736f04749b90 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -70,6 +70,15 @@ do fi fi done + if test -f "$rd/kcsan.sum" + then + if test -s "$rd/kcsan.sum" + then + echo KCSAN summary in $rd/kcsan.sum + else + echo Clean KCSAN run in $rd + fi + fi done EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1 ret=$? diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index e0352304b98b..6ff611c630d1 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -44,30 +44,32 @@ then fi echo ' ---' `date`: Starting build echo ' ---' Kconfig fragment at: $config_template >> $resdir/log -touch $resdir/ConfigFragment.input $resdir/ConfigFragment -if test -r "$config_dir/CFcommon" -then - echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input - cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input - config_override.sh $config_dir/CFcommon $config_template > $T/Kc1 - grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment -else - cp $config_template $T/Kc1 -fi -echo " --- $config_template" >> $resdir/ConfigFragment.input -cat $config_template >> $resdir/ConfigFragment.input -grep '#CHECK#' $config_template >> $resdir/ConfigFragment -if test -n "$TORTURE_KCONFIG_ARG" -then - echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline - echo " --- --kconfig argument" >> $resdir/ConfigFragment.input - cat $T/cmdline >> $resdir/ConfigFragment.input - config_override.sh $T/Kc1 $T/cmdline > $T/Kc2 - # Note that "#CHECK#" is not permitted on commandline. -else - cp $T/Kc1 $T/Kc2 -fi -cat $T/Kc2 >> $resdir/ConfigFragment +touch $resdir/ConfigFragment.input + +# Combine additional Kconfig options into an existing set such that +# newer options win. The first argument is the Kconfig source ID, the +# second the to-be-updated file within $T, and the third and final the +# list of additional Kconfig options. Note that a $2.tmp file is +# created when doing the update. +config_override_param () { + if test -n "$3" + then + echo $3 | sed -e 's/^ *//' -e 's/ *$//' | tr -s " " "\012" > $T/Kconfig_args + echo " --- $1" >> $resdir/ConfigFragment.input + cat $T/Kconfig_args >> $resdir/ConfigFragment.input + config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp + mv $T/$2.tmp $T/$2 + # Note that "#CHECK#" is not permitted on commandline. + fi +} + +echo > $T/KcList +config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`" +config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`" +config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG" +config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG" +config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" +cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux @@ -80,7 +82,7 @@ then ln -s $base_resdir/.config $resdir # for kvm-recheck.sh # Arch-independent indicator touch $resdir/builtkernel -elif kvm-build.sh $T/Kc2 $resdir +elif kvm-build.sh $T/KcList $resdir then # Had to build a kernel for this test. QEMU="`identify_qemu vmlinux`" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 2315e2ec12d6..c279cf9cb010 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -31,6 +31,8 @@ TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" +TORTURE_KCONFIG_KASAN_ARG="" +TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" TORTURE_QEMU_MEM=512 TORTURE_SHUTDOWN_GRACE=180 @@ -133,6 +135,12 @@ do TORTURE_KCONFIG_ARG="$2" shift ;; + --kasan) + TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + ;; + --kcsan) + TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG + ;; --kmake-arg) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' TORTURE_KMAKE_ARG="$2" @@ -310,6 +318,8 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG +TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG +TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE @@ -464,6 +474,7 @@ echo echo echo " --- `date` Test summary:" echo Results directory: $resdir/$ds +kcsan-collapse.sh $resdir/$ds kvm-recheck.sh $resdir/$ds ___EOF___ diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index c3c1fb5a9e1f..f2b20db9e296 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -14,3 +14,6 @@ TINY02 TASKS01 TASKS02 TASKS03 +RUDE01 +TRACE01 +TRACE02 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 new file mode 100644 index 000000000000..bafe94cbd739 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 @@ -0,0 +1,10 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +#CHECK#CONFIG_PROVE_RCU=y +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot new file mode 100644 index 000000000000..9363708c9075 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot @@ -0,0 +1 @@ +rcutorture.torture_type=tasks-rude diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 new file mode 100644 index 000000000000..12e7661b86f5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 @@ -0,0 +1,11 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +#CHECK#CONFIG_PROVE_RCU=y +CONFIG_TASKS_TRACE_RCU_READ_MB=y +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot new file mode 100644 index 000000000000..9675ad632dcc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot @@ -0,0 +1 @@ +rcutorture.torture_type=tasks-tracing diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 new file mode 100644 index 000000000000..b69ed6673c41 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 @@ -0,0 +1,11 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +#CHECK#CONFIG_PROVE_RCU=n +CONFIG_TASKS_TRACE_RCU_READ_MB=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot new file mode 100644 index 000000000000..9675ad632dcc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot @@ -0,0 +1 @@ +rcutorture.torture_type=tasks-tracing diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 2debe7891aeb..7311f84a5876 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=100 +CONFIG_NR_CPUS=56 CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 89fb3e0b552e..c0aa46ce14f6 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2803,12 +2803,13 @@ TEST(syscall_restart) offsetof(struct seccomp_data, nr)), #ifdef __NR_sigreturn - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0), #endif - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), /* Allow __NR_write for easy logging. */ @@ -2895,7 +2896,8 @@ TEST(syscall_restart) ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); ASSERT_EQ(0x100, msg); - EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid)); + ret = get_syscall(_metadata, child_pid); + EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep); /* Might as well check siginfo for sanity while we're here. */ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json new file mode 100644 index 000000000000..1cda2e11b3ad --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -0,0 +1,21 @@ +[ + { + "id": "83be", + "name": "Create FQ-PIE with invalid number of flows", + "category": [ + "qdisc", + "fq_pie" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY" + ] + } +] diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index b630c7b5950a..8155c2ea7ccb 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -1,17 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) -self.flags = flags -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - - -if [ -f /dev/tpm0 ] ; then - python -m unittest -v tpm2_tests.SmokeTest - python -m unittest -v tpm2_tests.AsyncTest -else - exit $ksft_skip -fi +python -m unittest -v tpm2_tests.SmokeTest +python -m unittest -v tpm2_tests.AsyncTest CLEAR_CMD=$(which tpm2_clear) if [ -n $CLEAR_CMD ]; then diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh index 180b469c53b4..a6f5e346635e 100755 --- a/tools/testing/selftests/tpm2/test_space.sh +++ b/tools/testing/selftests/tpm2/test_space.sh @@ -1,11 +1,4 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -if [ -f /dev/tpmrm0 ] ; then - python -m unittest -v tpm2_tests.SpaceTest -else - exit $ksft_skip -fi +python -m unittest -v tpm2_tests.SpaceTest diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 0edb6d900e8d..4f1831e62ea5 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -6,6 +6,7 @@ map_populate thuge-gen compaction_test mlock2-tests +mremap_dontunmap on-fault-limit transhuge-stress userfaultfd @@ -16,3 +17,4 @@ gup_benchmark va_128TBswitch map_fixed_noreplace write_to_hugetlbfs +hmm-tests diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index d31db052dff6..c6eb5305a0f6 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test TEST_GEN_FILES += gup_benchmark +TEST_GEN_FILES += hmm-tests TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += map_hugetlb @@ -20,7 +21,7 @@ TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) +ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs @@ -33,6 +34,8 @@ TEST_FILES := test_vmalloc.sh KSFT_KHDR_INSTALL := 1 include ../lib.mk +$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread + $(OUTPUT)/userfaultfd: LDLIBS += -lpthread $(OUTPUT)/mlock-random-test: LDLIBS += -lcap diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 93b90a9b1eeb..3ba674b64fa9 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -1,3 +1,5 @@ CONFIG_SYSVIPC=y CONFIG_USERFAULTFD=y CONFIG_TEST_VMALLOC=m +CONFIG_DEVICE_PRIVATE=y +CONFIG_TEST_HMM=m diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c new file mode 100644 index 000000000000..79db22604019 --- /dev/null +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -0,0 +1,1359 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * HMM stands for Heterogeneous Memory Management, it is a helper layer inside + * the linux kernel to help device drivers mirror a process address space in + * the device. This allows the device to use the same address space which + * makes communication and data exchange a lot easier. + * + * This framework's sole purpose is to exercise various code paths inside + * the kernel to make sure that HMM performs as expected and to flush out any + * bugs. + */ + +#include "../kselftest_harness.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <strings.h> +#include <time.h> +#include <pthread.h> +#include <hugetlbfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/ioctl.h> + +/* + * This is a private UAPI to the kernel test module so it isn't exported + * in the usual include/uapi/... directory. + */ +#include "../../../../lib/test_hmm_uapi.h" + +struct hmm_buffer { + void *ptr; + void *mirror; + unsigned long size; + int fd; + uint64_t cpages; + uint64_t faults; +}; + +#define TWOMEG (1 << 21) +#define HMM_BUFFER_SIZE (1024 << 12) +#define HMM_PATH_MAX 64 +#define NTIMES 256 + +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) + +FIXTURE(hmm) +{ + int fd; + unsigned int page_size; + unsigned int page_shift; +}; + +FIXTURE(hmm2) +{ + int fd0; + int fd1; + unsigned int page_size; + unsigned int page_shift; +}; + +static int hmm_open(int unit) +{ + char pathname[HMM_PATH_MAX]; + int fd; + + snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit); + fd = open(pathname, O_RDWR, 0); + if (fd < 0) + fprintf(stderr, "could not open hmm dmirror driver (%s)\n", + pathname); + return fd; +} + +FIXTURE_SETUP(hmm) +{ + self->page_size = sysconf(_SC_PAGE_SIZE); + self->page_shift = ffs(self->page_size) - 1; + + self->fd = hmm_open(0); + ASSERT_GE(self->fd, 0); +} + +FIXTURE_SETUP(hmm2) +{ + self->page_size = sysconf(_SC_PAGE_SIZE); + self->page_shift = ffs(self->page_size) - 1; + + self->fd0 = hmm_open(0); + ASSERT_GE(self->fd0, 0); + self->fd1 = hmm_open(1); + ASSERT_GE(self->fd1, 0); +} + +FIXTURE_TEARDOWN(hmm) +{ + int ret = close(self->fd); + + ASSERT_EQ(ret, 0); + self->fd = -1; +} + +FIXTURE_TEARDOWN(hmm2) +{ + int ret = close(self->fd0); + + ASSERT_EQ(ret, 0); + self->fd0 = -1; + + ret = close(self->fd1); + ASSERT_EQ(ret, 0); + self->fd1 = -1; +} + +static int hmm_dmirror_cmd(int fd, + unsigned long request, + struct hmm_buffer *buffer, + unsigned long npages) +{ + struct hmm_dmirror_cmd cmd; + int ret; + + /* Simulate a device reading system memory. */ + cmd.addr = (__u64)buffer->ptr; + cmd.ptr = (__u64)buffer->mirror; + cmd.npages = npages; + + for (;;) { + ret = ioctl(fd, request, &cmd); + if (ret == 0) + break; + if (errno == EINTR) + continue; + return -errno; + } + buffer->cpages = cmd.cpages; + buffer->faults = cmd.faults; + + return 0; +} + +static void hmm_buffer_free(struct hmm_buffer *buffer) +{ + if (buffer == NULL) + return; + + if (buffer->ptr) + munmap(buffer->ptr, buffer->size); + free(buffer->mirror); + free(buffer); +} + +/* + * Create a temporary file that will be deleted on close. + */ +static int hmm_create_file(unsigned long size) +{ + char path[HMM_PATH_MAX]; + int fd; + + strcpy(path, "/tmp"); + fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600); + if (fd >= 0) { + int r; + + do { + r = ftruncate(fd, size); + } while (r == -1 && errno == EINTR); + if (!r) + return fd; + close(fd); + } + return -1; +} + +/* + * Return a random unsigned number. + */ +static unsigned int hmm_random(void) +{ + static int fd = -1; + unsigned int r; + + if (fd < 0) { + fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "%s:%d failed to open /dev/urandom\n", + __FILE__, __LINE__); + return ~0U; + } + } + read(fd, &r, sizeof(r)); + return r; +} + +static void hmm_nanosleep(unsigned int n) +{ + struct timespec t; + + t.tv_sec = 0; + t.tv_nsec = n; + nanosleep(&t, NULL); +} + +/* + * Simple NULL test of device open/close. + */ +TEST_F(hmm, open_close) +{ +} + +/* + * Read private anonymous memory. + */ +TEST_F(hmm, anon_read) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int val; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* + * Initialize buffer in system memory but leave the first two pages + * zero (pte_none and pfn_zero). + */ + i = 2 * self->page_size / sizeof(*ptr); + for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Set buffer permission to read-only. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Populate the CPU page table with a special zero page. */ + val = *(int *)(buffer->ptr + self->page_size); + ASSERT_EQ(val, 0); + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + ptr = buffer->mirror; + for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + for (; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Read private anonymous memory which has been protected with + * mprotect() PROT_NONE. + */ +TEST_F(hmm, anon_read_prot) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Initialize mirror buffer so we can verify it isn't written. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = -i; + + /* Protect buffer from reading. */ + ret = mprotect(buffer->ptr, size, PROT_NONE); + ASSERT_EQ(ret, 0); + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, -EFAULT); + + /* Allow CPU to read the buffer so we can check it. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + hmm_buffer_free(buffer); +} + +/* + * Write private anonymous memory. + */ +TEST_F(hmm, anon_write) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Write private anonymous memory which has been protected with + * mprotect() PROT_READ. + */ +TEST_F(hmm, anon_write_prot) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device reading a zero page of memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 1); + ASSERT_EQ(buffer->faults, 1); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, -EPERM); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], 0); + + /* Now allow writing and see that the zero page is replaced. */ + ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Check that a device writing an anonymous private mapping + * will copy-on-write if a child process inherits the mapping. + */ +TEST_F(hmm, anon_write_child) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + pid_t pid; + int child_fd; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer->ptr so we can tell if it is written. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = -i; + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (pid != 0) { + waitpid(pid, &ret, 0); + ASSERT_EQ(WIFEXITED(ret), 1); + + /* Check that the parent's buffer did not change. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + return; + } + + /* Check that we see the parent's values. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + /* The child process needs its own mirror to its own mm. */ + child_fd = hmm_open(0); + ASSERT_GE(child_fd, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + close(child_fd); + exit(0); +} + +/* + * Check that a device writing an anonymous shared mapping + * will not copy-on-write if a child process inherits the mapping. + */ +TEST_F(hmm, anon_write_child_shared) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + pid_t pid; + int child_fd; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer->ptr so we can tell if it is written. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = -i; + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (pid != 0) { + waitpid(pid, &ret, 0); + ASSERT_EQ(WIFEXITED(ret), 1); + + /* Check that the parent's buffer did change. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + return; + } + + /* Check that we see the parent's values. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + /* The child process needs its own mirror to its own mm. */ + child_fd = hmm_open(0); + ASSERT_GE(child_fd, 0); + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], -i); + + close(child_fd); + exit(0); +} + +/* + * Write private anonymous huge page. + */ +TEST_F(hmm, anon_write_huge) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + void *old_ptr; + void *map; + int *ptr; + int ret; + + size = 2 * TWOMEG; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + size = TWOMEG; + npages = size >> self->page_shift; + map = (void *)ALIGN((uintptr_t)buffer->ptr, size); + ret = madvise(map, size, MADV_HUGEPAGE); + ASSERT_EQ(ret, 0); + old_ptr = buffer->ptr; + buffer->ptr = map; + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + buffer->ptr = old_ptr; + hmm_buffer_free(buffer); +} + +/* + * Write huge TLBFS page. + */ +TEST_F(hmm, anon_write_hugetlbfs) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + long pagesizes[4]; + int n, idx; + + /* Skip test if we can't allocate a hugetlbfs page. */ + + n = gethugepagesizes(pagesizes, 4); + if (n <= 0) + return; + for (idx = 0; --n > 0; ) { + if (pagesizes[n] < pagesizes[idx]) + idx = n; + } + size = ALIGN(TWOMEG, pagesizes[idx]); + npages = size >> self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->ptr = get_hugepage_region(size, GHR_STRICT); + if (buffer->ptr == NULL) { + free(buffer); + return; + } + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + free_hugepage_region(buffer->ptr); + buffer->ptr = NULL; + hmm_buffer_free(buffer); +} + +/* + * Read mmap'ed file memory. + */ +TEST_F(hmm, file_read) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + ssize_t len; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Write initial contents of the file. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + len = pwrite(fd, buffer->mirror, size, 0); + ASSERT_EQ(len, size); + memset(buffer->mirror, 0, size); + + buffer->ptr = mmap(NULL, size, + PROT_READ, + MAP_SHARED, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Write mmap'ed file memory. + */ +TEST_F(hmm, file_write) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + int fd; + ssize_t len; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + fd = hmm_create_file(size); + ASSERT_GE(fd, 0); + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = fd; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_SHARED, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize data that the device will write to buffer->ptr. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Simulate a device writing system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device wrote. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Check that the device also wrote the file. */ + len = pread(fd, buffer->mirror, size, 0); + ASSERT_EQ(len, size); + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous memory to device private memory. + */ +TEST_F(hmm, migrate) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous memory to device private memory and fault it back to system + * memory. + */ +TEST_F(hmm, migrate_fault) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); +} + +/* + * Try to migrate various memory types to device private memory. + */ +TEST_F(hmm2, migrate_mixed) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int *ptr; + unsigned char *p; + int ret; + int val; + + npages = 6; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + p = buffer->ptr; + + /* Migrating a protected area should be an error. */ + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages); + ASSERT_EQ(ret, -EINVAL); + + /* Punch a hole after the first page address. */ + ret = munmap(buffer->ptr + self->page_size, self->page_size); + ASSERT_EQ(ret, 0); + + /* We expect an error if the vma doesn't cover the range. */ + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3); + ASSERT_EQ(ret, -EINVAL); + + /* Page 2 will be a read-only zero page. */ + ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 2 * self->page_size); + val = *ptr + 3; + ASSERT_EQ(val, 3); + + /* Page 3 will be read-only. */ + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 3 * self->page_size); + *ptr = val; + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + + /* Page 4-5 will be read-write. */ + ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 4 * self->page_size); + *ptr = val; + ptr = (int *)(buffer->ptr + 5 * self->page_size); + *ptr = val; + + /* Now try to migrate pages 2-5 to device 1. */ + buffer->ptr = p + 2 * self->page_size; + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 4); + + /* Page 5 won't be migrated to device 0 because it's on device 1. */ + buffer->ptr = p + 5 * self->page_size; + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); + ASSERT_EQ(ret, -ENOENT); + buffer->ptr = p; + + buffer->ptr = p; + hmm_buffer_free(buffer); +} + +/* + * Migrate anonymous memory to device private memory and fault it back to system + * memory multiple times. + */ +TEST_F(hmm, migrate_multiple) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + unsigned long c; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + for (c = 0; c < NTIMES; c++) { + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, + npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Fault pages back to system memory and check them. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + hmm_buffer_free(buffer); + } +} + +/* + * Read anonymous memory multiple times. + */ +TEST_F(hmm, anon_read_multiple) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + unsigned long c; + int *ptr; + int ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + for (c = 0; c < NTIMES; c++) { + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i + c; + + /* Simulate a device reading system memory. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, + npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i + c); + + hmm_buffer_free(buffer); + } +} + +void *unmap_buffer(void *p) +{ + struct hmm_buffer *buffer = p; + + /* Delay for a bit and then unmap buffer while it is being read. */ + hmm_nanosleep(hmm_random() % 32000); + munmap(buffer->ptr + buffer->size / 2, buffer->size / 2); + buffer->ptr = NULL; + + return NULL; +} + +/* + * Try reading anonymous memory while it is being unmapped. + */ +TEST_F(hmm, anon_teardown) +{ + unsigned long npages; + unsigned long size; + unsigned long c; + void *ret; + + npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift; + ASSERT_NE(npages, 0); + size = npages << self->page_shift; + + for (c = 0; c < NTIMES; ++c) { + pthread_t thread; + struct hmm_buffer *buffer; + unsigned long i; + int *ptr; + int rc; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i + c; + + rc = pthread_create(&thread, NULL, unmap_buffer, buffer); + ASSERT_EQ(rc, 0); + + /* Simulate a device reading system memory. */ + rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, + npages); + if (rc == 0) { + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; + i < size / sizeof(*ptr); + ++i) + ASSERT_EQ(ptr[i], i + c); + } + + pthread_join(thread, &ret); + hmm_buffer_free(buffer); + } +} + +/* + * Test memory snapshot without faulting in pages accessed by the device. + */ +TEST_F(hmm2, snapshot) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + int *ptr; + unsigned char *p; + unsigned char *m; + int ret; + int val; + + npages = 7; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + p = buffer->ptr; + + /* Punch a hole after the first page address. */ + ret = munmap(buffer->ptr + self->page_size, self->page_size); + ASSERT_EQ(ret, 0); + + /* Page 2 will be read-only zero page. */ + ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 2 * self->page_size); + val = *ptr + 3; + ASSERT_EQ(val, 3); + + /* Page 3 will be read-only. */ + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 3 * self->page_size); + *ptr = val; + ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size, + PROT_READ); + ASSERT_EQ(ret, 0); + + /* Page 4-6 will be read-write. */ + ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size, + PROT_READ | PROT_WRITE); + ASSERT_EQ(ret, 0); + ptr = (int *)(buffer->ptr + 4 * self->page_size); + *ptr = val; + + /* Page 5 will be migrated to device 0. */ + buffer->ptr = p + 5 * self->page_size; + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 1); + + /* Page 6 will be migrated to device 1. */ + buffer->ptr = p + 6 * self->page_size; + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, 1); + + /* Simulate a device snapshotting CPU pagetables. */ + buffer->ptr = p; + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR); + ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR); + ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); + ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); + ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); + ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | + HMM_DMIRROR_PROT_WRITE); + ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); + + hmm_buffer_free(buffer); +} + +/* + * Test two devices reading the same memory (double mapped). + */ +TEST_F(hmm2, double_map) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + + npages = 6; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Make region read-only. */ + ret = mprotect(buffer->ptr, size, PROT_READ); + ASSERT_EQ(ret, 0); + + /* Simulate device 0 reading system memory. */ + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Simulate device 1 reading system memory. */ + ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + /* Punch a hole after the first page address. */ + ret = munmap(buffer->ptr + self->page_size, self->page_size); + ASSERT_EQ(ret, 0); + + hmm_buffer_free(buffer); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 665009ebfba4..a3f4f30f0a2e 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -59,7 +59,7 @@ else fi #filter 64bit architectures -ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64" +ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64" if [ -z $ARCH ]; then ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` fi @@ -307,4 +307,20 @@ else echo "[FAIL]" exitcode=1 fi + +echo "running HMM smoke test" +echo "------------------------------------" +./test_hmm.sh smoke +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi + exit $exitcode diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh new file mode 100755 index 000000000000..0647b525a625 --- /dev/null +++ b/tools/testing/selftests/vm/test_hmm.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> +# +# This is a test script for the kernel test driver to analyse vmalloc +# allocator. Therefore it is just a kernel module loader. You can specify +# and pass different parameters in order to: +# a) analyse performance of vmalloc allocations; +# b) stressing and stability check of vmalloc subsystem. + +TEST_NAME="test_hmm" +DRIVER="test_hmm" + +# 1 if fails +exitcode=1 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +check_test_requirements() +{ + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "$0: Must be run as root" + exit $ksft_skip + fi + + if ! which modprobe > /dev/null 2>&1; then + echo "$0: You need modprobe installed" + exit $ksft_skip + fi + + if ! modinfo $DRIVER > /dev/null 2>&1; then + echo "$0: You must have the following enabled in your kernel:" + echo "CONFIG_TEST_HMM=m" + exit $ksft_skip + fi +} + +load_driver() +{ + modprobe $DRIVER > /dev/null 2>&1 + if [ $? == 0 ]; then + major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) + mknod /dev/hmm_dmirror0 c $major 0 + mknod /dev/hmm_dmirror1 c $major 1 + fi +} + +unload_driver() +{ + modprobe -r $DRIVER > /dev/null 2>&1 + rm -f /dev/hmm_dmirror? +} + +run_smoke() +{ + echo "Running smoke test. Note, this test provides basic coverage." + + load_driver + $(dirname "${BASH_SOURCE[0]}")/hmm-tests + unload_driver +} + +usage() +{ + echo -n "Usage: $0" + echo + echo "Example usage:" + echo + echo "# Shows help message" + echo "./${TEST_NAME}.sh" + echo + echo "# Smoke testing" + echo "./${TEST_NAME}.sh smoke" + echo + exit 0 +} + +function run_test() +{ + if [ $# -eq 0 ]; then + usage + else + if [ "$1" = "smoke" ]; then + run_smoke + else + usage + fi + fi +} + +check_test_requirements +run_test $@ + +exit 0 diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/vm/write_to_hugetlbfs.c index 110bc4e4015d..6a2caba19ee1 100644 --- a/tools/testing/selftests/vm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/vm/write_to_hugetlbfs.c @@ -74,8 +74,6 @@ int main(int argc, char **argv) int write = 0; int reserve = 1; - unsigned long i; - if (signal(SIGINT, sig_handler) == SIG_ERR) err(1, "\ncan't catch SIGINT\n"); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 936e1ca9410e..17a1f53ceba0 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -48,8 +48,11 @@ cleanup() { exec 2>/dev/null printf "$orig_message_cost" > /proc/sys/net/core/message_cost ip0 link del dev wg0 + ip0 link del dev wg1 ip1 link del dev wg0 + ip1 link del dev wg1 ip2 link del dev wg0 + ip2 link del dev wg1 local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" [[ -n $to_kill ]] && kill $to_kill pp ip netns del $netns1 @@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2 key1="$(pp wg genkey)" key2="$(pp wg genkey)" key3="$(pp wg genkey)" +key4="$(pp wg genkey)" pub1="$(pp wg pubkey <<<"$key1")" pub2="$(pp wg pubkey <<<"$key2")" pub3="$(pp wg pubkey <<<"$key3")" +pub4="$(pp wg pubkey <<<"$key4")" psk="$(pp wg genpsk)" [[ -n $key1 && -n $key2 && -n $psk ]] configure_peers() { ip1 addr add 192.168.241.1/24 dev wg0 - ip1 addr add fd00::1/24 dev wg0 + ip1 addr add fd00::1/112 dev wg0 ip2 addr add 192.168.241.2/24 dev wg0 - ip2 addr add fd00::2/24 dev wg0 + ip2 addr add fd00::2/112 dev wg0 n1 wg set wg0 \ private-key <(echo "$key1") \ @@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2 n1 wg set wg0 private-key <(echo "$key3") n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove n1 ping -W 1 -c 1 192.168.241.2 +n2 wg set wg0 peer "$pub3" remove + +# Test that we can route wg through wg +ip1 addr flush dev wg0 +ip2 addr flush dev wg0 +ip1 addr add fd00::5:1/112 dev wg0 +ip2 addr add fd00::5:2/112 dev wg0 +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998 +ip1 link add wg1 type wireguard +ip2 link add wg1 type wireguard +ip1 addr add 192.168.241.1/24 dev wg1 +ip1 addr add fd00::1/112 dev wg1 +ip2 addr add 192.168.241.2/24 dev wg1 +ip2 addr add fd00::2/112 dev wg1 +ip1 link set mtu 1340 up dev wg1 +ip2 link set mtu 1340 up dev wg1 +n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5 +n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5 +tests +# Try to set up a routing loop between the two namespaces +ip1 link set netns $netns0 dev wg1 +ip0 addr add 192.168.241.1/24 dev wg1 +ip0 link set up dev wg1 +n0 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 +ip2 link del wg0 +ip2 link del wg1 +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel +ip0 link del wg1 ip1 link del wg0 -ip2 link del wg0 # Test using NAT. We now change the topology to this: # ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ @@ -282,6 +316,20 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0 +# Test that onion routing works, even when it loops +n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 +ip1 addr add 192.168.242.1/24 dev wg0 +ip2 link add wg1 type wireguard +ip2 addr add 192.168.242.2/24 dev wg1 +n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32 +ip2 link set wg1 up +n1 ping -W 1 -c 1 192.168.242.2 +ip2 link del wg1 +n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5 +! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel +n1 wg set wg0 peer "$pub3" remove +ip1 addr del 192.168.242.1/24 dev wg0 + # Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. ip1 -6 addr add fc00::9/96 dev vethc ip1 -6 route add default via fc00::1 diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 90598a425c18..4bdd6c1a19d3 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -44,7 +44,7 @@ endef $(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) -$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692)) $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config index 990c510a9cfa..f52f1e2bc7f6 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config @@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_WARN=1280 +CONFIG_THREAD_SHIFT=14 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 5909e7ef2a5c..b50c2085c1ac 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -25,7 +25,6 @@ CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y CONFIG_UBSAN_SANITIZE_ALL=y -CONFIG_UBSAN_NO_ALIGNMENT=y CONFIG_UBSAN_NULL=y CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 @@ -58,7 +57,6 @@ CONFIG_RCU_EQS_DEBUG=y CONFIG_USER_STACKTRACE_SUPPORT=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DOUBLEFAULT=y CONFIG_X86_DEBUG_FPU=y CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_DEBUG_PAGEALLOC=y diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index f33f32f1d208..b587b9a7a124 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -4,7 +4,7 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE +CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} @@ -22,7 +22,8 @@ OOT_CONFIGS=\ CONFIG_VHOST=m \ CONFIG_VHOST_NET=n \ CONFIG_VHOST_SCSI=n \ - CONFIG_VHOST_VSOCK=n + CONFIG_VHOST_VSOCK=n \ + CONFIG_VHOST_RING=n OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V} oot-build: echo "UNSUPPORTED! Don't use the resulting modules in production!" diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index d0351f83aebe..04d563fc9b95 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <stdlib.h> #if defined(__i386__) || defined(__x86_64__) #define barrier() asm volatile("" ::: "memory") #define virt_mb() __sync_synchronize() diff --git a/tools/virtio/generated/autoconf.h b/tools/virtio/generated/autoconf.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/virtio/generated/autoconf.h diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h index 903dc9c4bd11..2c51bccb97bb 100644 --- a/tools/virtio/linux/compiler.h +++ b/tools/virtio/linux/compiler.h @@ -7,4 +7,5 @@ #define READ_ONCE(var) (*((volatile typeof(var) *)(&(var)))) +#define __aligned(x) __attribute((__aligned__(x))) #endif diff --git a/tools/vm/Makefile b/tools/vm/Makefile index 20f6cf04377f..9860622cbb15 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm tools # +include ../scripts/Makefile.include + TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api |