diff options
Diffstat (limited to 'tools')
602 files changed, 28475 insertions, 6281 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 5ef1c15e88ad..11e86739456d 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t) "RECLAIM %12s%15s%15s\n" " %15llu%15llu%15llums\n" "THRASHING%12s%15s%15s\n" + " %15llu%15llu%15llums\n" + "COMPACT %12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->thrashing_count, (unsigned long long)t->thrashing_delay_total, - average_ms(t->thrashing_delay_total, t->thrashing_count)); + average_ms(t->thrashing_delay_total, t->thrashing_count), + "count", "delay total", "delay average", + (unsigned long long)t->compact_count, + (unsigned long long)t->compact_delay_total, + average_ms(t->compact_delay_total, t->compact_count)); } static void task_context_switch_counts(struct taskstats *t) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d5b5f2ab87a0..18de5f76f198 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -315,6 +315,7 @@ #define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 01e2650b9585..3faf0f97edb1 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -486,6 +486,23 @@ #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f +/* AMD Collaborative Processor Performance Control MSRs */ +#define MSR_AMD_CPPC_CAP1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define MSR_AMD_CPPC_CAP2 0xc00102b2 +#define MSR_AMD_CPPC_REQ 0xc00102b3 +#define MSR_AMD_CPPC_STATUS 0xc00102b4 + +#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) +#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) +#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) +#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) + +#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) +#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) +#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) +#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index b2d504f11937..aff774775c67 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -35,11 +35,7 @@ # define NEED_CMOV 0 #endif -#ifdef CONFIG_X86_USE_3DNOW -# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) -#else # define NEED_3DNOW 0 -#endif #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1cc9da6e29c7..59cf2343f3d9 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy) rep movsq movl %edx, %ecx rep movsb - ret + RET SYM_FUNC_END(memcpy) SYM_FUNC_END_ALIAS(__memcpy) EXPORT_SYMBOL(memcpy) @@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms) movq %rdi, %rax movq %rdx, %rcx rep movsb - ret + RET SYM_FUNC_END(memcpy_erms) SYM_FUNC_START_LOCAL(memcpy_orig) @@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq %r9, 1*8(%rdi) movq %r10, -2*8(%rdi, %rdx) movq %r11, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_16bytes: cmpl $8, %edx @@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movq -1*8(%rsi, %rdx), %r9 movq %r8, 0*8(%rdi) movq %r9, -1*8(%rdi, %rdx) - retq + RET .p2align 4 .Lless_8bytes: cmpl $4, %edx @@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movl -4(%rsi, %rdx), %r8d movl %ecx, (%rdi) movl %r8d, -4(%rdi, %rdx) - retq + RET .p2align 4 .Lless_3bytes: subl $1, %edx @@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig) movb %cl, (%rdi) .Lend: - retq + RET SYM_FUNC_END(memcpy_orig) .popsection diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 9827ae267f96..d624f2bc42f1 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -40,7 +40,7 @@ SYM_FUNC_START(__memset) movl %edx,%ecx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(__memset) SYM_FUNC_END_ALIAS(memset) EXPORT_SYMBOL(memset) @@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms) movq %rdx,%rcx rep stosb movq %r9,%rax - ret + RET SYM_FUNC_END(memset_erms) SYM_FUNC_START_LOCAL(memset_orig) @@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig) .Lende: movq %r10,%rax - ret + RET .Lbad_alignment: cmpq $7,%rdx diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 05ce4446b780..a736f64dc5dc 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) *.d /bootstrap/ /bpftool diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index c49487905ceb..ac8487dcff1d 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,6 +1,5 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f @@ -25,7 +24,7 @@ man: man8 man8: $(DOC_MAN8) RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null) -RST2MAN_OPTS += --verbose +RST2MAN_OPTS += --verbose --strip-comments list_pages = $(sort $(basename $(filter-out $(1),$(MAN8_RST)))) see_also = $(subst " ",, \ diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index 88b28aa7431f..342716f74ec4 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-btf ================ @@ -7,13 +9,14 @@ tool for inspection of BTF data :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **btf** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | - { **-B** | **--base-btf** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-B** | **--base-btf** } } *COMMANDS* := { **dump** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 3e4395eede4f..a17e9aa314fd 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-cgroup ================ @@ -7,13 +9,14 @@ tool for inspection and simple manipulation of eBPF progs :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **cgroup** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } @@ -30,9 +33,9 @@ CGROUP COMMANDS | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | -| **sock_release** } +| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -98,9 +101,9 @@ DESCRIPTION **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an unconnected udp6 socket (since 4.18); **recvmsg4** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp4 socket (since 5.2); + an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for - an unconnected udp6 socket (since 5.2); + an unconnected udp6 socket (since 5.2); **sysctl** sysctl access (since 5.2); **getsockopt** call to getsockopt (since 5.3); **setsockopt** call to setsockopt (since 5.3); diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index ab9f57ee4c3a..4ce9a77bc1e0 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + =============== bpftool-feature =============== @@ -7,12 +9,14 @@ tool for inspection of eBPF-related parameters for Linux kernel or net device :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **feature** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **probe** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 2ef2f2df0279..bc276388f432 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-gen ================ @@ -7,13 +9,14 @@ tool for BPF code-generation :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **gen** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-L** | **--use-loader** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } } *COMMAND* := { **object** | **skeleton** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 471f363a725a..84839d488621 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ============ bpftool-iter ============ @@ -7,12 +9,14 @@ tool to create BPF iterators :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **iter** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 0de90f086238..52a4eee4af54 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-link ================ @@ -7,13 +9,14 @@ tool for inspection and simple manipulation of eBPF links :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **link** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index d0c4abe08aba..7c188a598444 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-map ================ @@ -7,17 +9,18 @@ tool for inspection and simple manipulation of eBPF maps :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } | { **-n** | **--nomount** } } + *OPTIONS* := { |COMMON_OPTIONS| | { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** - | **delete** | **pin** | **help** } + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | + **delete** | **pin** | **help** } MAP COMMANDS ============= @@ -52,7 +55,7 @@ MAP COMMANDS | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage** - | **task_storage** } +| | **task_storage** | **bloom_filter** } DESCRIPTION =========== diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index 1ae0375e8fea..f4e0a516335a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-net ================ @@ -7,12 +9,14 @@ tool for inspection of netdev/tc related bpf prog attachments :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **net** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } @@ -31,44 +35,44 @@ NET COMMANDS DESCRIPTION =========== **bpftool net { show | list }** [ **dev** *NAME* ] - List bpf program attachments in the kernel networking subsystem. - - Currently, only device driver xdp attachments and tc filter - classification/action attachments are implemented, i.e., for - program types **BPF_PROG_TYPE_SCHED_CLS**, - **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. - For programs attached to a particular cgroup, e.g., - **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, - **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, - users can use **bpftool cgroup** to dump cgroup attachments. - For sk_{filter, skb, msg, reuseport} and lwt/seg6 - bpf programs, users should consult other tools, e.g., iproute2. - - The current output will start with all xdp program attachments, followed by - all tc class/qdisc bpf program attachments. Both xdp programs and - tc programs are ordered based on ifindex number. If multiple bpf - programs attached to the same networking device through **tc filter**, - the order will be first all bpf programs attached to tc classes, then - all bpf programs attached to non clsact qdiscs, and finally all - bpf programs attached to root and clsact qdisc. + List bpf program attachments in the kernel networking subsystem. + + Currently, only device driver xdp attachments and tc filter + classification/action attachments are implemented, i.e., for + program types **BPF_PROG_TYPE_SCHED_CLS**, + **BPF_PROG_TYPE_SCHED_ACT** and **BPF_PROG_TYPE_XDP**. + For programs attached to a particular cgroup, e.g., + **BPF_PROG_TYPE_CGROUP_SKB**, **BPF_PROG_TYPE_CGROUP_SOCK**, + **BPF_PROG_TYPE_SOCK_OPS** and **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + users can use **bpftool cgroup** to dump cgroup attachments. + For sk_{filter, skb, msg, reuseport} and lwt/seg6 + bpf programs, users should consult other tools, e.g., iproute2. + + The current output will start with all xdp program attachments, followed by + all tc class/qdisc bpf program attachments. Both xdp programs and + tc programs are ordered based on ifindex number. If multiple bpf + programs attached to the same networking device through **tc filter**, + the order will be first all bpf programs attached to tc classes, then + all bpf programs attached to non clsact qdiscs, and finally all + bpf programs attached to root and clsact qdisc. **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ] - Attach bpf program *PROG* to network interface *NAME* with - type specified by *ATTACH_TYPE*. Previously attached bpf program - can be replaced by the command used with **overwrite** option. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + Attach bpf program *PROG* to network interface *NAME* with + type specified by *ATTACH_TYPE*. Previously attached bpf program + can be replaced by the command used with **overwrite** option. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. - *ATTACH_TYPE* can be of: - **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; - **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; - **xdpdrv** - Native XDP. runs earliest point in driver's receive path; - **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; + *ATTACH_TYPE* can be of: + **xdp** - try native XDP and fallback to generic XDP if NIC driver does not support it; + **xdpgeneric** - Generic XDP. runs at generic XDP hook when packet already enters receive path as skb; + **xdpdrv** - Native XDP. runs earliest point in driver's receive path; + **xdpoffload** - Offload XDP. runs directly on NIC on each packet reception; **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME* - Detach bpf program attached to network interface *NAME* with - type specified by *ATTACH_TYPE*. To detach bpf program, same - *ATTACH_TYPE* previously used for attach must be specified. - Currently, only XDP-related modes are supported for *ATTACH_TYPE*. + Detach bpf program attached to network interface *NAME* with + type specified by *ATTACH_TYPE*. To detach bpf program, same + *ATTACH_TYPE* previously used for attach must be specified. + Currently, only XDP-related modes are supported for *ATTACH_TYPE*. **bpftool net help** Print short help message. diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index ce52798a917d..5fea633a82f1 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-perf ================ @@ -7,12 +9,14 @@ tool for inspection of perf related bpf prog attachments :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **perf** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **show** | **list** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 91608cb7e44a..a2e9359e554c 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ bpftool-prog ================ @@ -7,18 +9,20 @@ tool for inspection and simple manipulation of eBPF progs :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | - { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | - { **-L** | **--use-loader** } } + *OPTIONS* := { |COMMON_OPTIONS| | + { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | + { **-L** | **--use-loader** } } *COMMANDS* := - { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** - | **loadall** | **help** } + { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | + **loadall** | **help** } PROG COMMANDS ============= diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index 02afc0fc14cb..ee53a122c0c7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================== bpftool-struct_ops ================== @@ -7,12 +9,14 @@ tool to register/unregister/introspect BPF struct_ops :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { |COMMON_OPTIONS| } *COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index bb23f55bb05a..7084dd9fa2f8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + ================ BPFTOOL ================ @@ -7,6 +9,8 @@ tool for inspection and simple manipulation of eBPF programs and maps :Manual section: 8 +.. include:: substitutions.rst + SYNOPSIS ======== @@ -18,15 +22,14 @@ SYNOPSIS *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } - *OPTIONS* := { { **-V** | **--version** } | - { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *OPTIONS* := { { **-V** | **--version** } | |COMMON_OPTIONS| } *MAP-COMMANDS* := { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | - **delete** | **pin** | **event_pipe** | **help** } + **delete** | **pin** | **event_pipe** | **help** } *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | - **load** | **attach** | **detach** | **help** } + **load** | **attach** | **detach** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 05d06c74dcaa..908487b9c2ad 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + -h, --help Print short help message (similar to **bpftool help**). @@ -20,3 +22,12 @@ Print all logs available, even debug-level information. This includes logs from libbpf as well as from the verifier, when attempting to load programs. + +-l, --legacy + Use legacy libbpf mode which has more relaxed BPF program + requirements. By default, bpftool has more strict requirements + about section names, changes pinning logic and doesn't support + some of the older non-BTF map declarations. + + See https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0 + for details. diff --git a/tools/bpf/bpftool/Documentation/substitutions.rst b/tools/bpf/bpftool/Documentation/substitutions.rst new file mode 100644 index 000000000000..ccf1ffa0686c --- /dev/null +++ b/tools/bpf/bpftool/Documentation/substitutions.rst @@ -0,0 +1,3 @@ +.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +.. |COMMON_OPTIONS| replace:: { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | { **-l** | **--legacy** } diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 7cfba11c3014..83369f55df61 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,6 +1,5 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) include ../../scripts/Makefile.include -include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) @@ -58,7 +57,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_ $(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT) $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \ DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR) prefix= \ - ARCH= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) $@ install_headers $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR) $(call QUIET_INSTALL, $@) @@ -153,6 +152,9 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif +HOST_CFLAGS = $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),\ + $(subst $(CLANG_CROSS_FLAGS),,$(CFLAGS))) + BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) @@ -187,7 +189,8 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP) -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ -I$(LIBBPF_BOOTSTRAP_INCLUDE) \ - -g -O2 -Wall -target bpf -c $< -o $@ && $(LLVM_STRIP) -g $@ + -g -O2 -Wall -target bpf -c $< -o $@ + $(Q)$(LLVM_STRIP) -g $@ $(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ @@ -202,10 +205,10 @@ endif CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) $(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< + $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ $(OUTPUT)feature.o: ifneq ($(feature-zlib), 1) @@ -213,19 +216,16 @@ ifneq ($(feature-zlib), 1) endif $(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF_BOOTSTRAP) - $(QUIET_LINK)$(HOSTCC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) \ - $(LIBS_BOOTSTRAP) + $(QUIET_LINK)$(HOSTCC) $(HOST_CFLAGS) $(LDFLAGS) $(BOOTSTRAP_OBJS) $(LIBS_BOOTSTRAP) -o $@ $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ $(BOOTSTRAP_OUTPUT)%.o: %.c $(LIBBPF_BOOTSTRAP_INTERNAL_HDRS) | $(BOOTSTRAP_OUTPUT) - $(QUIET_CC)$(HOSTCC) \ - $(subst -I$(LIBBPF_INCLUDE),-I$(LIBBPF_BOOTSTRAP_INCLUDE),$(CFLAGS)) \ - -c -MMD -o $@ $< + $(QUIET_CC)$(HOSTCC) $(HOST_CFLAGS) -c -MMD $< -o $@ $(OUTPUT)%.o: %.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< + $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD $< -o $@ feature-detect-clean: $(call QUIET_CLEAN, feature-detect) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 88e2bcf16cca..493753a4962e 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -261,7 +261,7 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then local c='--version --json --pretty --bpffs --mapcompat --debug \ - --use-loader --base-btf' + --use-loader --base-btf --legacy' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -710,7 +710,8 @@ _bpftool() hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops inode_storage task_storage ringbuf' + struct_ops ringbuf inode_storage task_storage \ + bloom_filter' COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 015d2758f826..59833125ac0a 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -39,6 +39,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", + [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; struct btf_attach_point { @@ -142,6 +143,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: if (json_output) jsonw_uint_field(w, "type_id", t->type); else @@ -418,9 +420,10 @@ static int dump_btf_c(const struct btf *btf, struct btf_dump *d; int err = 0, i; - d = btf_dump__new(btf, NULL, NULL, btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); + d = btf_dump__new(btf, btf_dump_printf, NULL, NULL); + err = libbpf_get_error(d); + if (err) + return err; printf("#ifndef __VMLINUX_H__\n"); printf("#define __VMLINUX_H__\n"); @@ -547,8 +550,8 @@ static int do_dump(int argc, char **argv) } btf = btf__parse_split(*argv, base ?: base_btf); - if (IS_ERR(btf)) { - err = -PTR_ERR(btf); + err = libbpf_get_error(btf); + if (err) { btf = NULL; p_err("failed to load BTF from %s: %s", *argv, strerror(err)); diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 9c25286a5c73..f5dddf8ef404 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -32,14 +32,16 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, const struct btf_type *func_proto, __u32 prog_id) { - struct bpf_prog_info_linear *prog_info = NULL; const struct btf_type *func_type; + int prog_fd = -1, func_sig_len; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const char *prog_name = NULL; - struct bpf_func_info *finfo; struct btf *prog_btf = NULL; - struct bpf_prog_info *info; - int prog_fd, func_sig_len; + struct bpf_func_info finfo; + __u32 finfo_rec_size; char prog_str[1024]; + int err; /* Get the ptr's func_proto */ func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0, @@ -52,25 +54,30 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, /* Get the bpf_prog's name. Obtain from func_info. */ prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (prog_fd == -1) + if (prog_fd < 0) goto print; - prog_info = bpf_program__get_prog_info_linear(prog_fd, - 1UL << BPF_PROG_INFO_FUNC_INFO); - close(prog_fd); - if (IS_ERR(prog_info)) { - prog_info = NULL; + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) goto print; - } - info = &prog_info->info; - if (!info->btf_id || !info->nr_func_info) + if (!info.btf_id || !info.nr_func_info) + goto print; + + finfo_rec_size = info.func_info_rec_size; + memset(&info, 0, sizeof(info)); + info.nr_func_info = 1; + info.func_info_rec_size = finfo_rec_size; + info.func_info = ptr_to_u64(&finfo); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err) goto print; - prog_btf = btf__load_from_kernel_by_id(info->btf_id); + + prog_btf = btf__load_from_kernel_by_id(info.btf_id); if (libbpf_get_error(prog_btf)) goto print; - finfo = u64_to_ptr(info->func_info); - func_type = btf__type_by_id(prog_btf, finfo->type_id); + func_type = btf__type_by_id(prog_btf, finfo.type_id); if (!func_type || !btf_is_func(func_type)) goto print; @@ -92,7 +99,8 @@ print: prog_str[sizeof(prog_str) - 1] = '\0'; jsonw_string(d->jw, prog_str); btf__free(prog_btf); - free(prog_info); + if (prog_fd >= 0) + close(prog_fd); return 0; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 511eccdbdfe6..fa8eb8134344 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -74,6 +74,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_XDP] = "xdp", [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", + [BPF_PERF_EVENT] = "perf_event", }; void p_err(const char *fmt, ...) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index ade44577688e..e999159fa28d 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -467,7 +467,7 @@ static bool probe_bpf_syscall(const char *define_prefix) { bool res; - bpf_load_program(BPF_PROG_TYPE_UNSPEC, NULL, 0, NULL, 0, NULL, 0); + bpf_prog_load(BPF_PROG_TYPE_UNSPEC, NULL, NULL, NULL, 0, NULL); res = (errno != ENOSYS); print_bool_feature("have_bpf_syscall", @@ -643,15 +643,111 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type, } static void -probe_large_insn_limit(const char *define_prefix, __u32 ifindex) +probe_misc_feature(struct bpf_insn *insns, size_t len, + const char *define_prefix, __u32 ifindex, + const char *feat_name, const char *plain_name, + const char *define_name) { + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .prog_ifindex = ifindex, + ); bool res; + int fd; - res = bpf_probe_large_insn_limit(ifindex); - print_bool_feature("have_large_insn_limit", + errno = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", + insns, len, &opts); + res = fd >= 0 || !errno; + + if (fd >= 0) + close(fd); + + print_bool_feature(feat_name, plain_name, define_name, res, + define_prefix); +} + +/* + * Probe for availability of kernel commit (5.3): + * + * c04c0d2b968a ("bpf: increase complexity limit and maximum program size") + */ +static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[BPF_MAXINSNS + 1]; + int i; + + for (i = 0; i < BPF_MAXINSNS; i++) + insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1); + insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_large_insn_limit", "Large program size limit", - "LARGE_INSN_LIMIT", - res, define_prefix); + "LARGE_INSN_LIMIT"); +} + +/* + * Probe for bounded loop support introduced in commit 2589726d12a1 + * ("bpf: introduce bounded loops"). + */ +static void +probe_bounded_loops(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 10), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_bounded_loops", + "Bounded loop support", + "BOUNDED_LOOPS"); +} + +/* + * Probe for the v2 instruction set extension introduced in commit 92b31a9af73b + * ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions"). + */ +static void +probe_v2_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v2_isa_extension", + "ISA extension v2", + "V2_ISA_EXTENSION"); +} + +/* + * Probe for the v3 instruction set extension introduced in commit 092ed0968bb6 + * ("bpf: verifier support JMP32"). + */ +static void +probe_v3_isa_extension(const char *define_prefix, __u32 ifindex) +{ + struct bpf_insn insns[4] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN() + }; + + probe_misc_feature(insns, ARRAY_SIZE(insns), + define_prefix, ifindex, + "have_v3_isa_extension", + "ISA extension v3", + "V3_ISA_EXTENSION"); } static void @@ -768,6 +864,9 @@ static void section_misc(const char *define_prefix, __u32 ifindex) "/*** eBPF misc features ***/", define_prefix); probe_large_insn_limit(define_prefix, ifindex); + probe_bounded_loops(define_prefix, ifindex); + probe_v2_isa_extension(define_prefix, ifindex); + probe_v3_isa_extension(define_prefix, ifindex); print_end_section(); } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 5c18351290f0..b4695df2ea3d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -218,9 +218,10 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) char sec_ident[256], map_ident[256]; int i, err = 0; - d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); + d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL); + err = libbpf_get_error(d); + if (err) + return err; bpf_object__for_each_map(map, obj) { /* only generate definitions for memory-mapped internal maps */ @@ -485,7 +486,6 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard) { - struct bpf_object_load_attr load_attr = {}; DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); struct bpf_map *map; char ident[256]; @@ -495,12 +495,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h if (err) return err; - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto out; @@ -718,11 +713,15 @@ static int do_skeleton(int argc, char **argv) if (obj_name[0] == '\0') get_obj_name(obj_name, file); opts.object_name = obj_name; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + opts.kernel_log_level = 1 + 2 + 4; obj = bpf_object__open_mem(obj_data, file_sz, &opts); - if (IS_ERR(obj)) { + err = libbpf_get_error(obj); + if (err) { char err_buf[256]; - libbpf_strerror(PTR_ERR(obj), err_buf, sizeof(err_buf)); + libbpf_strerror(err, err_buf, sizeof(err_buf)); p_err("failed to open BPF object file: %s", err_buf); obj = NULL; goto out; diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 6c0de647b8ad..f88fdc820d23 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -46,7 +46,8 @@ static int do_pin(int argc, char **argv) } obj = bpf_object__open(objfile); - if (IS_ERR(obj)) { + err = libbpf_get_error(obj); + if (err) { p_err("can't open objfile %s", objfile); goto close_map_fd; } @@ -64,8 +65,8 @@ static int do_pin(int argc, char **argv) } link = bpf_program__attach_iter(prog, &iter_opts); - if (IS_ERR(link)) { - err = PTR_ERR(link); + err = libbpf_get_error(link); + if (err) { p_err("attach_iter failed for program %s", bpf_program__name(prog)); goto close_obj; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 28237d7cef67..020e91a542d5 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,6 +31,7 @@ bool block_mount; bool verifier_logs; bool relaxed_maps; bool use_loader; +bool legacy_libbpf; struct btf *base_btf; struct hashmap *refs_table; @@ -92,6 +93,7 @@ static int do_version(int argc, char **argv) jsonw_name(json_wtr, "features"); jsonw_start_object(json_wtr); /* features */ jsonw_bool_field(json_wtr, "libbfd", has_libbfd); + jsonw_bool_field(json_wtr, "libbpf_strict", !legacy_libbpf); jsonw_bool_field(json_wtr, "skeletons", has_skeletons); jsonw_end_object(json_wtr); /* features */ @@ -105,6 +107,10 @@ static int do_version(int argc, char **argv) printf(" libbfd"); nb_features++; } + if (!legacy_libbpf) { + printf("%s libbpf_strict", nb_features++ ? "," : ""); + nb_features++; + } if (has_skeletons) printf("%s skeletons", nb_features++ ? "," : ""); printf("\n"); @@ -396,10 +402,14 @@ int main(int argc, char **argv) { "debug", no_argument, NULL, 'd' }, { "use-loader", no_argument, NULL, 'L' }, { "base-btf", required_argument, NULL, 'B' }, + { "legacy", no_argument, NULL, 'l' }, { 0 } }; + bool version_requested = false; int opt, ret; + setlinebuf(stdout); + last_do_help = do_help; pretty_output = false; json_output = false; @@ -408,11 +418,12 @@ int main(int argc, char **argv) bin_name = argv[0]; opterr = 0; - while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", + while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l", options, NULL)) >= 0) { switch (opt) { case 'V': - return do_version(argc, argv); + version_requested = true; + break; case 'h': return do_help(argc, argv); case 'p': @@ -454,6 +465,9 @@ int main(int argc, char **argv) case 'L': use_loader = true; break; + case 'l': + legacy_libbpf = true; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) @@ -463,11 +477,20 @@ int main(int argc, char **argv) } } + if (!legacy_libbpf) { + ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (ret) + p_err("failed to enable libbpf strict mode: %d", ret); + } + argc -= optind; argv += optind; if (argc < 0) usage(); + if (version_requested) + return do_version(argc, argv); + ret = cmd_select(cmds, argc, argv, do_help); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 383835c2604d..8d76d937a62b 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -57,7 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ @@ -90,6 +90,7 @@ extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern bool use_loader; +extern bool legacy_libbpf; extern struct btf *base_btf; extern struct hashmap *refs_table; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cae1f1119296..cc530a229812 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -53,6 +53,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_RINGBUF] = "ringbuf", [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage", [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage", + [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", }; const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@ -811,7 +812,7 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(btf_vmlinux)) + if (libbpf_get_error(btf_vmlinux)) p_err("failed to get kernel btf"); } return btf_vmlinux; @@ -831,13 +832,13 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) static void free_map_kv_btf(struct btf *btf) { - if (!IS_ERR(btf) && btf != btf_vmlinux) + if (!libbpf_get_error(btf) && btf != btf_vmlinux) btf__free(btf); } static void free_btf_vmlinux(void) { - if (!IS_ERR(btf_vmlinux)) + if (!libbpf_get_error(btf_vmlinux)) btf__free(btf_vmlinux); } @@ -862,8 +863,8 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, if (wtr) { btf = get_map_kv_btf(info); - if (IS_ERR(btf)) { - err = PTR_ERR(btf); + err = libbpf_get_error(btf); + if (err) { goto exit_free; } @@ -1260,7 +1261,10 @@ static int do_pin(int argc, char **argv) static int do_create(int argc, char **argv) { - struct bpf_create_map_attr attr = { NULL, }; + LIBBPF_OPTS(bpf_map_create_opts, attr); + enum bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC; + __u32 key_size = 0, value_size = 0, max_entries = 0; + const char *map_name = NULL; const char *pinfile; int err = -1, fd; @@ -1275,30 +1279,30 @@ static int do_create(int argc, char **argv) if (is_prefix(*argv, "type")) { NEXT_ARG(); - if (attr.map_type) { + if (map_type) { p_err("map type already specified"); goto exit; } - attr.map_type = map_type_from_str(*argv); - if ((int)attr.map_type < 0) { + map_type = map_type_from_str(*argv); + if ((int)map_type < 0) { p_err("unrecognized map type: %s", *argv); goto exit; } NEXT_ARG(); } else if (is_prefix(*argv, "name")) { NEXT_ARG(); - attr.name = GET_ARG(); + map_name = GET_ARG(); } else if (is_prefix(*argv, "key")) { - if (parse_u32_arg(&argc, &argv, &attr.key_size, + if (parse_u32_arg(&argc, &argv, &key_size, "key size")) goto exit; } else if (is_prefix(*argv, "value")) { - if (parse_u32_arg(&argc, &argv, &attr.value_size, + if (parse_u32_arg(&argc, &argv, &value_size, "value size")) goto exit; } else if (is_prefix(*argv, "entries")) { - if (parse_u32_arg(&argc, &argv, &attr.max_entries, + if (parse_u32_arg(&argc, &argv, &max_entries, "max entries")) goto exit; } else if (is_prefix(*argv, "flags")) { @@ -1339,14 +1343,14 @@ static int do_create(int argc, char **argv) } } - if (!attr.name) { + if (!map_name) { p_err("map name not specified"); goto exit; } set_max_rlimit(); - fd = bpf_create_map_xattr(&attr); + fd = bpf_map_create(map_type, map_name, key_size, value_size, max_entries, &attr); if (fd < 0) { p_err("map create failed: %s", strerror(errno)); goto exit; @@ -1477,7 +1481,7 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage }\n" + " task_storage | bloom_filter }\n" " " HELP_SPEC_OPTIONS " |\n" " {-f|--bpffs} | {-n|--nomount} }\n" "", diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index b98ea702d284..6b0c410152de 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -124,7 +124,7 @@ int do_event_pipe(int argc, char **argv) .wakeup_events = 1, }; struct bpf_map_info map_info = {}; - struct perf_buffer_raw_opts opts = {}; + LIBBPF_OPTS(perf_buffer_raw_opts, opts); struct event_pipe_ctx ctx = { .all_cpus = true, .cpu = -1, @@ -190,14 +190,11 @@ int do_event_pipe(int argc, char **argv) ctx.idx = 0; } - opts.attr = &perf_attr; - opts.event_cb = print_bpf_output; - opts.ctx = &ctx; opts.cpu_cnt = ctx.all_cpus ? 0 : 1; opts.cpus = &ctx.cpu; opts.map_keys = &ctx.idx; - - pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &opts); + pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr, + print_bpf_output, &ctx, &opts); err = libbpf_get_error(pb); if (err) { p_err("failed to create perf buffer: %s (%d)", diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 515d22952602..2a21d50516bc 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -100,6 +100,76 @@ static enum bpf_attach_type parse_attach_type(const char *str) return __MAX_BPF_ATTACH_TYPE; } +static int prep_prog_info(struct bpf_prog_info *const info, enum dump_mode mode, + void **info_data, size_t *const info_data_sz) +{ + struct bpf_prog_info holder = {}; + size_t needed = 0; + void *ptr; + + if (mode == DUMP_JITED) { + holder.jited_prog_len = info->jited_prog_len; + needed += info->jited_prog_len; + } else { + holder.xlated_prog_len = info->xlated_prog_len; + needed += info->xlated_prog_len; + } + + holder.nr_jited_ksyms = info->nr_jited_ksyms; + needed += info->nr_jited_ksyms * sizeof(__u64); + + holder.nr_jited_func_lens = info->nr_jited_func_lens; + needed += info->nr_jited_func_lens * sizeof(__u32); + + holder.nr_func_info = info->nr_func_info; + holder.func_info_rec_size = info->func_info_rec_size; + needed += info->nr_func_info * info->func_info_rec_size; + + holder.nr_line_info = info->nr_line_info; + holder.line_info_rec_size = info->line_info_rec_size; + needed += info->nr_line_info * info->line_info_rec_size; + + holder.nr_jited_line_info = info->nr_jited_line_info; + holder.jited_line_info_rec_size = info->jited_line_info_rec_size; + needed += info->nr_jited_line_info * info->jited_line_info_rec_size; + + if (needed > *info_data_sz) { + ptr = realloc(*info_data, needed); + if (!ptr) + return -1; + + *info_data = ptr; + *info_data_sz = needed; + } + ptr = *info_data; + + if (mode == DUMP_JITED) { + holder.jited_prog_insns = ptr_to_u64(ptr); + ptr += holder.jited_prog_len; + } else { + holder.xlated_prog_insns = ptr_to_u64(ptr); + ptr += holder.xlated_prog_len; + } + + holder.jited_ksyms = ptr_to_u64(ptr); + ptr += holder.nr_jited_ksyms * sizeof(__u64); + + holder.jited_func_lens = ptr_to_u64(ptr); + ptr += holder.nr_jited_func_lens * sizeof(__u32); + + holder.func_info = ptr_to_u64(ptr); + ptr += holder.nr_func_info * holder.func_info_rec_size; + + holder.line_info = ptr_to_u64(ptr); + ptr += holder.nr_line_info * holder.line_info_rec_size; + + holder.jited_line_info = ptr_to_u64(ptr); + ptr += holder.nr_jited_line_info * holder.jited_line_info_rec_size; + + *info = holder; + return 0; +} + static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) { struct timespec real_time_ts, boot_time_ts; @@ -639,8 +709,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, char func_sig[1024]; unsigned char *buf; __u32 member_len; + int fd, err = -1; ssize_t n; - int fd; if (mode == DUMP_JITED) { if (info->jited_prog_len == 0 || !info->jited_prog_insns) { @@ -679,7 +749,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (fd < 0) { p_err("can't open file %s: %s", filepath, strerror(errno)); - return -1; + goto exit_free; } n = write(fd, buf, member_len); @@ -687,7 +757,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, if (n != (ssize_t)member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); - return -1; + goto exit_free; } if (json_output) @@ -701,7 +771,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, info->netns_ino, &disasm_opt); if (!name) - return -1; + goto exit_free; } if (info->nr_jited_func_lens && info->jited_func_lens) { @@ -796,23 +866,28 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, kernel_syms_destroy(&dd); } - btf__free(btf); + err = 0; - return 0; +exit_free: + btf__free(btf); + bpf_prog_linfo__free(prog_linfo); + return err; } static int do_dump(int argc, char **argv) { - struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + size_t info_data_sz = 0; + void *info_data = NULL; char *filepath = NULL; bool opcodes = false; bool visual = false; enum dump_mode mode; bool linum = false; - int *fds = NULL; int nb_fds, i = 0; + int *fds = NULL; int err = -1; - __u64 arrays; if (is_prefix(*argv, "jited")) { if (disasm_init()) @@ -872,43 +947,44 @@ static int do_dump(int argc, char **argv) goto exit_close; } - if (mode == DUMP_JITED) - arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; - else - arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; - - arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - if (json_output && nb_fds > 1) jsonw_start_array(json_wtr); /* root array */ for (i = 0; i < nb_fds; i++) { - info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); - if (IS_ERR_OR_NULL(info_linear)) { + memset(&info, 0, sizeof(info)); + + err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); + if (err) { + p_err("can't get prog info: %s", strerror(errno)); + break; + } + + err = prep_prog_info(&info, mode, &info_data, &info_data_sz); + if (err) { + p_err("can't grow prog info_data"); + break; + } + + err = bpf_obj_get_info_by_fd(fds[i], &info, &info_len); + if (err) { p_err("can't get prog info: %s", strerror(errno)); break; } if (json_output && nb_fds > 1) { jsonw_start_object(json_wtr); /* prog object */ - print_prog_header_json(&info_linear->info); + print_prog_header_json(&info); jsonw_name(json_wtr, "insns"); } else if (nb_fds > 1) { - print_prog_header_plain(&info_linear->info); + print_prog_header_plain(&info); } - err = prog_dump(&info_linear->info, mode, filepath, opcodes, - visual, linum); + err = prog_dump(&info, mode, filepath, opcodes, visual, linum); if (json_output && nb_fds > 1) jsonw_end_object(json_wtr); /* prog object */ else if (i != nb_fds - 1 && nb_fds > 1) printf("\n"); - free(info_linear); if (err) break; close(fds[i]); @@ -920,6 +996,7 @@ exit_close: for (; i < nb_fds; i++) close(fds[i]); exit_free: + free(info_data); free(fds); return err; } @@ -1387,7 +1464,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, .relaxed_maps = relaxed_maps, ); - struct bpf_object_load_attr load_attr = { 0 }; enum bpf_attach_type expected_attach_type; struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; @@ -1409,8 +1485,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) while (argc) { if (is_prefix(*argv, "type")) { - char *type; - NEXT_ARG(); if (common_prog_type != BPF_PROG_TYPE_UNSPEC) { @@ -1420,21 +1494,26 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) if (!REQ_ARGS(1)) goto err_free_reuse_maps; - /* Put a '/' at the end of type to appease libbpf */ - type = malloc(strlen(*argv) + 2); - if (!type) { - p_err("mem alloc failed"); - goto err_free_reuse_maps; - } - *type = 0; - strcat(type, *argv); - strcat(type, "/"); + err = libbpf_prog_type_by_name(*argv, &common_prog_type, + &expected_attach_type); + if (err < 0) { + /* Put a '/' at the end of type to appease libbpf */ + char *type = malloc(strlen(*argv) + 2); - err = get_prog_type_by_name(type, &common_prog_type, - &expected_attach_type); - free(type); - if (err < 0) - goto err_free_reuse_maps; + if (!type) { + p_err("mem alloc failed"); + goto err_free_reuse_maps; + } + *type = 0; + strcat(type, *argv); + strcat(type, "/"); + + err = get_prog_type_by_name(type, &common_prog_type, + &expected_attach_type); + free(type); + if (err < 0) + goto err_free_reuse_maps; + } NEXT_ARG(); } else if (is_prefix(*argv, "map")) { @@ -1518,6 +1597,10 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) set_max_rlimit(); + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + open_opts.kernel_log_level = 1 + 2 + 4; + obj = bpf_object__open_file(file, &open_opts); if (libbpf_get_error(obj)) { p_err("failed to open object file"); @@ -1572,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) j = 0; idx = 0; bpf_object__for_each_map(map, obj) { - if (!bpf_map__is_offload_neutral(map)) + if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) bpf_map__set_ifindex(map, ifindex); if (j < old_map_fds && idx == map_replace[j].idx) { @@ -1597,12 +1680,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) goto err_close_obj; } - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto err_close_obj; @@ -1657,6 +1735,11 @@ err_unpin: else bpf_object__unpin_programs(obj, pinfile); err_close_obj: + if (!legacy_libbpf) { + p_info("Warning: bpftool is now running in libbpf strict mode and has more stringent requirements about BPF programs.\n" + "If it used to work for this object file but now doesn't, see --legacy option for more details.\n"); + } + bpf_object__close(obj); err_free_reuse_maps: for (i = 0; i < old_map_fds; i++) @@ -1689,17 +1772,19 @@ static int try_loader(struct gen_loader_opts *gen) sizeof(struct bpf_prog_desc)); int log_buf_sz = (1u << 24) - 1; int err, fds_before, fd_delta; - char *log_buf; + char *log_buf = NULL; ctx = alloca(ctx_sz); memset(ctx, 0, ctx_sz); ctx->sz = ctx_sz; - ctx->log_level = 1; - ctx->log_size = log_buf_sz; - log_buf = malloc(log_buf_sz); - if (!log_buf) - return -ENOMEM; - ctx->log_buf = (long) log_buf; + if (verifier_logs) { + ctx->log_level = 1 + 2 + 4; + ctx->log_size = log_buf_sz; + log_buf = malloc(log_buf_sz); + if (!log_buf) + return -ENOMEM; + ctx->log_buf = (long) log_buf; + } opts.ctx = ctx; opts.data = gen->data; opts.data_sz = gen->data_sz; @@ -1708,9 +1793,9 @@ static int try_loader(struct gen_loader_opts *gen) fds_before = count_open_fds(); err = bpf_load_and_run(&opts); fd_delta = count_open_fds() - fds_before; - if (err < 0) { + if (err < 0 || verifier_logs) { fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf); - if (fd_delta) + if (fd_delta && err < 0) fprintf(stderr, "loader prog leaked %d FDs\n", fd_delta); } @@ -1722,7 +1807,6 @@ static int do_loader(int argc, char **argv) { DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); DECLARE_LIBBPF_OPTS(gen_loader_opts, gen); - struct bpf_object_load_attr load_attr = {}; struct bpf_object *obj; const char *file; int err = 0; @@ -1731,6 +1815,10 @@ static int do_loader(int argc, char **argv) return -1; file = GET_ARG(); + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + open_opts.kernel_log_level = 1 + 2 + 4; + obj = bpf_object__open_file(file, &open_opts); if (libbpf_get_error(obj)) { p_err("failed to open object file"); @@ -1741,12 +1829,7 @@ static int do_loader(int argc, char **argv) if (err) goto err_close_obj; - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { p_err("failed to load object file"); goto err_close_obj; @@ -2016,41 +2099,58 @@ static void profile_print_readings(void) static char *profile_target_name(int tgt_fd) { - struct bpf_prog_info_linear *info_linear; - struct bpf_func_info *func_info; + struct bpf_func_info func_info; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); const struct btf_type *t; + __u32 func_info_rec_size; struct btf *btf = NULL; char *name = NULL; + int err; - info_linear = bpf_program__get_prog_info_linear( - tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); - if (IS_ERR_OR_NULL(info_linear)) { - p_err("failed to get info_linear for prog FD %d", tgt_fd); - return NULL; + err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); + if (err) { + p_err("failed to bpf_obj_get_info_by_fd for prog FD %d", tgt_fd); + goto out; } - if (info_linear->info.btf_id == 0) { + if (info.btf_id == 0) { p_err("prog FD %d doesn't have valid btf", tgt_fd); goto out; } - btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + func_info_rec_size = info.func_info_rec_size; + if (info.nr_func_info == 0) { + p_err("bpf_obj_get_info_by_fd for prog FD %d found 0 func_info", tgt_fd); + goto out; + } + + memset(&info, 0, sizeof(info)); + info.nr_func_info = 1; + info.func_info_rec_size = func_info_rec_size; + info.func_info = ptr_to_u64(&func_info); + + err = bpf_obj_get_info_by_fd(tgt_fd, &info, &info_len); + if (err) { + p_err("failed to get func_info for prog FD %d", tgt_fd); + goto out; + } + + btf = btf__load_from_kernel_by_id(info.btf_id); if (libbpf_get_error(btf)) { p_err("failed to load btf for prog FD %d", tgt_fd); goto out; } - func_info = u64_to_ptr(info_linear->info.func_info); - t = btf__type_by_id(btf, func_info[0].type_id); + t = btf__type_by_id(btf, func_info.type_id); if (!t) { p_err("btf %d doesn't have type %d", - info_linear->info.btf_id, func_info[0].type_id); + info.btf_id, func_info.type_id); goto out; } name = strdup(btf__name_by_offset(btf, t->name_off)); out: btf__free(btf); - free(info_linear); return name; } diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c index d9b420972934..f70702fcb224 100644 --- a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -71,8 +71,8 @@ int iter(struct bpf_iter__task_file *ctx) e.pid = task->tgid; e.id = get_obj_id(file->private_data, obj_type); - bpf_probe_read_kernel(&e.comm, sizeof(e.comm), - task->group_leader->comm); + bpf_probe_read_kernel_str(&e.comm, sizeof(e.comm), + task->group_leader->comm); bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); return 0; diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index ab2d2290569a..2f693b082bdb 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -32,7 +32,7 @@ static const struct btf *get_btf_vmlinux(void) return btf_vmlinux; btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(btf_vmlinux)) + if (libbpf_get_error(btf_vmlinux)) p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y"); return btf_vmlinux; @@ -45,7 +45,7 @@ static const char *get_kern_struct_ops_name(const struct bpf_map_info *info) const char *st_ops_name; kern_btf = get_btf_vmlinux(); - if (IS_ERR(kern_btf)) + if (libbpf_get_error(kern_btf)) return "<btf_vmlinux_not_found>"; t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id); @@ -63,7 +63,7 @@ static __s32 get_map_info_type_id(void) return map_info_type_id; kern_btf = get_btf_vmlinux(); - if (IS_ERR(kern_btf)) { + if (libbpf_get_error(kern_btf)) { map_info_type_id = PTR_ERR(kern_btf); return map_info_type_id; } @@ -252,7 +252,7 @@ static struct res do_one_id(const char *id_str, work_func func, void *data, } fd = bpf_map_get_fd_by_id(id); - if (fd == -1) { + if (fd < 0) { p_err("can't get map by id (%lu): %s", id, strerror(errno)); res.nr_errs++; return res; @@ -415,7 +415,7 @@ static int do_dump(int argc, char **argv) } kern_btf = get_btf_vmlinux(); - if (IS_ERR(kern_btf)) + if (libbpf_get_error(kern_btf)) return -1; if (!json_output) { @@ -479,7 +479,7 @@ static int do_unregister(int argc, char **argv) static int do_register(int argc, char **argv) { - struct bpf_object_load_attr load_attr = {}; + LIBBPF_OPTS(bpf_object_open_opts, open_opts); const struct bpf_map_def *def; struct bpf_map_info info = {}; __u32 info_len = sizeof(info); @@ -494,18 +494,17 @@ static int do_register(int argc, char **argv) file = GET_ARG(); - obj = bpf_object__open(file); - if (IS_ERR_OR_NULL(obj)) + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + open_opts.kernel_log_level = 1 + 2 + 4; + + obj = bpf_object__open_file(file, &open_opts); + if (libbpf_get_error(obj)) return -1; set_max_rlimit(); - load_attr.obj = obj; - if (verifier_logs) - /* log_level1 + log_level2 + stats, but not stable UAPI */ - load_attr.log_level = 1 + 2 + 4; - - if (bpf_object__load_xattr(&load_attr)) { + if (bpf_object__load(obj)) { bpf_object__close(obj); return -1; } @@ -516,7 +515,7 @@ static int do_register(int argc, char **argv) continue; link = bpf_map__attach_struct_ops(map); - if (IS_ERR(link)) { + if (libbpf_get_error(link)) { p_err("can't register struct_ops %s: %s", bpf_map__name(map), strerror(-PTR_ERR(link))); @@ -596,7 +595,7 @@ int do_struct_ops(int argc, char **argv) err = cmd_select(cmds, argc, argv, do_help); - if (!IS_ERR(btf_vmlinux)) + if (!libbpf_get_error(btf_vmlinux)) btf__free(btf_vmlinux); return err; diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 751643f860b2..9ddeca947635 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -19,6 +19,7 @@ CC = $(HOSTCC) LD = $(HOSTLD) ARCH = $(HOSTARCH) RM ?= rm +CROSS_COMPILE = OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 73409e27be01..5d26f3c6f918 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -168,7 +168,7 @@ static struct btf_id *btf_id__find(struct rb_root *root, const char *name) return NULL; } -static struct btf_id* +static struct btf_id * btf_id__add(struct rb_root *root, char *name, bool unique) { struct rb_node **p = &root->rb_node; @@ -732,7 +732,8 @@ int main(int argc, const char **argv) if (obj.efile.idlist_shndx == -1 || obj.efile.symbols_shndx == -1) { pr_debug("Cannot find .BTF_ids or symbols sections, nothing to do\n"); - return 0; + err = 0; + goto out; } if (symbols_collect(&obj)) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index 8791d0e2762b..da6de16a3dfb 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -12,7 +12,7 @@ BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a BPF_DESTDIR := $(BPFOBJ_OUTPUT) BPF_INCLUDE := $(BPF_DESTDIR)/include INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi) -CFLAGS := -g -Wall +CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS) # Try to detect best kernel BTF source KERNEL_REL := $(shell uname -r) @@ -88,4 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) \ - CC=$(HOSTCC) LD=$(HOSTLD) + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index d89715844952..2414cc764461 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -123,7 +123,6 @@ int main(int argc, char **argv) .parser = parse_arg, .doc = argp_program_doc, }; - struct perf_buffer_opts pb_opts; struct perf_buffer *pb = NULL; struct runqslower_bpf *obj; int err; @@ -165,9 +164,8 @@ int main(int argc, char **argv) printf("Tracing run queue latency higher than %llu us\n", env.min_us); printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); - pb_opts.sample_cb = handle_event; - pb_opts.lost_cb = handle_lost_events; - pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, + handle_event, handle_lost_events, NULL, NULL); err = libbpf_get_error(pb); if (err) { pb = NULL; diff --git a/tools/build/Build.include b/tools/build/Build.include index 2cf3b1bde86e..c2a95ab47379 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX ### ## HOSTCC C flags -host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) +host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) # output directory for tests below TMPOUT = .tmp_$$$$ diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index 82070eadfc07..727d22e34a6e 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -14,6 +14,12 @@ # define __NR_bpf 349 # elif defined(__s390__) # define __NR_bpf 351 +# elif defined(__mips__) && defined(_ABIO32) +# define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +# define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +# define __NR_bpf 5315 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 0076437f6e3f..b94a16ba5c6c 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -279,6 +279,7 @@ static void print_event(struct iio_event_data *event) printf(", direction: %s", iio_ev_dir_text[dir]); printf("\n"); + fflush(stdout); } /* Enable or disable events in sysfs if the knob is available */ diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h index ad6fa21d977b..38edaa08f862 100644 --- a/tools/include/linux/hash.h +++ b/tools/include/linux/hash.h @@ -62,10 +62,7 @@ static inline u32 __hash_32_generic(u32 val) return val * GOLDEN_RATIO_32; } -#ifndef HAVE_ARCH_HASH_32 -#define hash_32 hash_32_generic -#endif -static inline u32 hash_32_generic(u32 val, unsigned int bits) +static inline u32 hash_32(u32 val, unsigned int bits) { /* High bits are more random, so use them. */ return __hash_32(val) >> (32 - bits); diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 3e8df500cfbd..9701e8307db0 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -92,7 +92,9 @@ int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); int scnprintf(char * buf, size_t size, const char * fmt, ...); int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); +#ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) +#endif #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 3430667b0d24..c1c285fe494a 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -265,12 +265,17 @@ struct stat { * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively * - the system call is performed by calling the syscall instruction * - syscall return comes in rax - * - rcx and r8..r11 may be clobbered, others are preserved. + * - rcx and r11 are clobbered, others are preserved. * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. * - the syscall number is always specified last in order to allow to force * some registers before (gcc refuses a %-register at the last position). + * - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1 + * Calling Conventions. + * + * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI + * */ #define my_syscall0(num) \ @@ -280,9 +285,9 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -295,10 +300,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -312,10 +317,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -330,10 +335,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ "0"(_num) \ - : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -349,10 +354,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ "0"(_num) \ - : "rcx", "r8", "r9", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -369,10 +374,10 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "0"(_num) \ - : "rcx", "r9", "r11", "memory", "cc" \ + : "rcx", "r11", "memory", "cc" \ ); \ _ret; \ }) @@ -390,7 +395,7 @@ struct stat { \ asm volatile ( \ "syscall\n" \ - : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "=a"(_ret) \ : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ "r"(_arg6), "0"(_num) \ : "rcx", "r11", "memory", "cc" \ @@ -399,17 +404,23 @@ struct stat { }) /* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %rdi\n" // argc (first arg, %rdi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when - "sub $8, %rsp\n" // entering the callee + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "call main\n" // main() returns the status code, we'll exit with it. - "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits - "mov $60, %rax\n" // NR_exit == 60 + "mov %eax, %edi\n" // retrieve exit code (32 bit) + "mov $60, %eax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return ""); @@ -577,20 +588,28 @@ struct sys_stat_struct { }) /* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %eax\n" // argc (first arg, %eax) "mov %esp, %ebx\n" // argv[] (second arg, %ebx) "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) "push %ecx\n" // push all registers on the stack so that we "push %ebx\n" // support both regparm and plain stack modes "push %eax\n" "call main\n" // main() returns the status code in %eax - "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now "hlt\n" // ensure it does not ""); @@ -774,7 +793,6 @@ asm(".section .text\n" "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits "movs r7, $1\n" // NR_exit == 1 "svc $0x00\n" ""); @@ -971,7 +989,6 @@ asm(".section .text\n" "add x2, x2, x1\n" // + argv "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits "mov x8, 93\n" // NR_exit == 93 "svc #0\n" ""); @@ -1176,7 +1193,7 @@ asm(".section .text\n" "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! "jal main\n" // main() returns the status code, we'll exit with it. "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "move $a0, $v0\n" // retrieve 32-bit exit code from v0 "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" @@ -1374,7 +1391,6 @@ asm(".section .text\n" "add a2,a2,a1\n" // + argv "andi sp,a1,-16\n" // sp must be 16-byte aligned "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits "li a7, 93\n" // NR_exit == 93 "ecall\n" ""); @@ -1556,6 +1572,12 @@ pid_t sys_getpid(void) } static __attribute__((unused)) +pid_t sys_gettid(void) +{ + return my_syscall0(__NR_gettid); +} + +static __attribute__((unused)) int sys_gettimeofday(struct timeval *tv, struct timezone *tz) { return my_syscall2(__NR_gettimeofday, tv, tz); @@ -2014,6 +2036,18 @@ pid_t getpid(void) } static __attribute__((unused)) +pid_t gettid(void) +{ + pid_t ret = sys_gettid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) int gettimeofday(struct timeval *tv, struct timezone *tz) { int ret = sys_gettimeofday(tv, tz); diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 3b810b53ba8b..642808520d92 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1096,6 +1096,24 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Planes are valid until one has a + * zero handle -- this can be used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) /* diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ba5af15e25f5..b0383d371b9a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1342,8 +1342,10 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; - __u32 :32; /* pad */ + __u32 core_relo_cnt; /* number of bpf_core_relo */ __aligned_u64 fd_array; /* array of FDs */ + __aligned_u64 core_relos; + __u32 core_relo_rec_size; /* sizeof(struct bpf_core_relo) */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1744,7 +1746,7 @@ union bpf_attr { * if the maximum number of tail calls has been reached for this * chain of programs. This limit is defined in the kernel by the * macro **MAX_TAIL_CALL_CNT** (not accessible to user space), - * which is currently set to 32. + * which is currently set to 33. * Return * 0 on success, or a negative error in case of failure. * @@ -4938,6 +4940,84 @@ union bpf_attr { * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. + * + * long bpf_find_vma(struct task_struct *task, u64 addr, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * Find vma of *task* that contains *addr*, call *callback_fn* + * function with *task*, *vma*, and *callback_ctx*. + * The *callback_fn* should be a static function and + * the *callback_ctx* should be a pointer to the stack. + * The *flags* is used to control certain aspects of the helper. + * Currently, the *flags* must be 0. + * + * The expected callback signature is + * + * long (\*callback_fn)(struct task_struct \*task, struct vm_area_struct \*vma, void \*callback_ctx); + * + * Return + * 0 on success. + * **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*. + * **-EBUSY** if failed to try lock mmap_lock. + * **-EINVAL** for invalid **flags**. + * + * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_loops**, call **callback_fn** function + * with **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. Currently, nr_loops is + * limited to 1 << 23 (~8 million) loops. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the loop. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * loop. If return value is 1, the helper will skip the rest of + * the loops and return. Other return values are not used now, + * and will be rejected by the verifier. + * + * Return + * The number of loops performed, **-EINVAL** for invalid **flags**, + * **-E2BIG** if **nr_loops** exceeds the maximum number of loops. + * + * long bpf_strncmp(const char *s1, u32 s1_sz, const char *s2) + * Description + * Do strncmp() between **s1** and **s2**. **s1** doesn't need + * to be null-terminated and **s1_sz** is the maximum storage + * size of **s1**. **s2** must be a read-only string. + * Return + * An integer less than, equal to, or greater than zero + * if the first **s1_sz** bytes of **s1** is found to be + * less than, to match, or be greater than **s2**. + * + * long bpf_get_func_arg(void *ctx, u32 n, u64 *value) + * Description + * Get **n**-th argument (zero based) of the traced function (for tracing programs) + * returned in **value**. + * + * Return + * 0 on success. + * **-EINVAL** if n >= arguments count of traced function. + * + * long bpf_get_func_ret(void *ctx, u64 *value) + * Description + * Get return value of the traced function (for tracing programs) + * in **value**. + * + * Return + * 0 on success. + * **-EOPNOTSUPP** for tracing programs other than BPF_TRACE_FEXIT or BPF_MODIFY_RETURN. + * + * long bpf_get_func_arg_cnt(void *ctx) + * Description + * Get number of arguments of the traced function (for tracing programs). + * + * Return + * The number of arguments of the traced function. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5120,6 +5200,12 @@ union bpf_attr { FN(trace_vprintk), \ FN(skc_to_unix_sock), \ FN(kallsyms_lookup_name), \ + FN(find_vma), \ + FN(loop), \ + FN(strncmp), \ + FN(get_func_arg), \ + FN(get_func_ret), \ + FN(get_func_arg_cnt), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -6296,6 +6382,7 @@ struct bpf_sk_lookup { __u32 local_ip4; /* Network byte order */ __u32 local_ip6[4]; /* Network byte order */ __u32 local_port; /* Host byte order */ + __u32 ingress_ifindex; /* The arriving interface. Determined by inet_iif. */ }; /* @@ -6328,4 +6415,78 @@ enum { BTF_F_ZERO = (1ULL << 3), }; +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. It is emitted by llvm and passed to + * libbpf and later to the kernel. + */ +enum bpf_core_relo_kind { + BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */ + BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + +/* + * "struct bpf_core_relo" is used to pass relocation data form LLVM to libbpf + * and from libbpf to the kernel. + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * - kind - one of enum bpf_core_relo_kind; + * + * Example: + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int *x = &s->a; // encoded as "0:0" (a is field #0) + * int *y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int *z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index deb12f755f0f..b0d8fea1951d 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -43,7 +43,7 @@ struct btf_type { * "size" tells the size of the type it is describing. * * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC, FUNC_PROTO, VAR and DECL_TAG. + * FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. * "type" is a type_id referring to another type. */ union { @@ -75,6 +75,7 @@ enum { BTF_KIND_DATASEC = 15, /* Section */ BTF_KIND_FLOAT = 16, /* Floating point */ BTF_KIND_DECL_TAG = 17, /* Decl Tag */ + BTF_KIND_TYPE_TAG = 18, /* Type Tag */ NR_BTF_KINDS, BTF_KIND_MAX = NR_BTF_KINDS - 1, diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index eebd3894fe89..6218f93f5c1a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -347,6 +347,7 @@ enum { */ IFLA_PARENT_DEV_NAME, IFLA_PARENT_DEV_BUS_NAME, + IFLA_GRO_MAX_SIZE, __IFLA_MAX }; @@ -858,6 +859,7 @@ enum { IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, IFLA_BOND_AD_LACP_ACTIVE, + IFLA_BOND_MISSED_MAX, __IFLA_BOND_MAX, }; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index bd8860eeb291..4cd39aaccbe7 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1332,7 +1332,10 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -/* 2-7 available */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index b393b5e82380..f947b61b2107 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -84,11 +84,13 @@ else endif # Append required CFLAGS +override CFLAGS += -std=gnu89 override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum override CFLAGS += -Werror -Wall override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC @@ -161,7 +163,7 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT) - $(QUIET_LINK)$(CC) $(LDFLAGS) \ + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \ --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 725701235fd8..550b4cbb6c99 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,6 +28,9 @@ #include <asm/unistd.h> #include <errno.h> #include <linux/bpf.h> +#include <linux/filter.h> +#include <limits.h> +#include <sys/resource.h> #include "bpf.h" #include "libbpf.h" #include "libbpf_internal.h" @@ -49,6 +52,12 @@ # define __NR_bpf 351 # elif defined(__arc__) # define __NR_bpf 280 +# elif defined(__mips__) && defined(_ABIO32) +# define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +# define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +# define __NR_bpf 5315 # else # error __NR_bpf not defined. libbpf does not support your arch. # endif @@ -74,158 +83,208 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, return ensure_good_fd(fd); } -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +#define PROG_LOAD_ATTEMPTS 5 + +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) { - int retries = 5; int fd; do { fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); - } while (fd < 0 && errno == EAGAIN && retries-- > 0); + } while (fd < 0 && errno == EAGAIN && --attempts > 0); return fd; } -int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) +/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to + * memcg-based memory accounting for BPF maps and progs. This was done in [0]. + * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in + * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. + * + * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ + * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") + */ +int probe_memcg_account(void) +{ + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + struct bpf_insn insns[] = { + BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); + union bpf_attr attr; + int prog_fd; + + /* attempt loading freplace trying to use custom BTF */ + memset(&attr, 0, prog_load_attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insn_cnt; + attr.license = ptr_to_u64("GPL"); + + prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); + if (prog_fd >= 0) { + close(prog_fd); + return 1; + } + return 0; +} + +static bool memlock_bumped; +static rlim_t memlock_rlim = RLIM_INFINITY; + +int libbpf_set_memlock_rlim(size_t memlock_bytes) +{ + if (memlock_bumped) + return libbpf_err(-EBUSY); + + memlock_rlim = memlock_bytes; + return 0; +} + +int bump_rlimit_memlock(void) +{ + struct rlimit rlim; + + /* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ + if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) + return 0; + + /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ + if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + return 0; + + memlock_bumped = true; + + /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ + if (memlock_rlim == 0) + return 0; + + rlim.rlim_cur = rlim.rlim_max = memlock_rlim; + if (setrlimit(RLIMIT_MEMLOCK, &rlim)) + return -errno; + + return 0; +} + +int bpf_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const struct bpf_map_create_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, map_extra); union bpf_attr attr; int fd; - memset(&attr, '\0', sizeof(attr)); - - attr.map_type = create_attr->map_type; - attr.key_size = create_attr->key_size; - attr.value_size = create_attr->value_size; - attr.max_entries = create_attr->max_entries; - attr.map_flags = create_attr->map_flags; - if (create_attr->name) - memcpy(attr.map_name, create_attr->name, - min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); - attr.numa_node = create_attr->numa_node; - attr.btf_fd = create_attr->btf_fd; - attr.btf_key_type_id = create_attr->btf_key_type_id; - attr.btf_value_type_id = create_attr->btf_value_type_id; - attr.map_ifindex = create_attr->map_ifindex; - if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) - attr.btf_vmlinux_value_type_id = - create_attr->btf_vmlinux_value_type_id; - else - attr.inner_map_fd = create_attr->inner_map_fd; - attr.map_extra = create_attr->map_extra; + bump_rlimit_memlock(); + + memset(&attr, 0, attr_sz); + + if (!OPTS_VALID(opts, bpf_map_create_opts)) + return libbpf_err(-EINVAL); + + attr.map_type = map_type; + if (map_name) + libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); + attr.btf_fd = OPTS_GET(opts, btf_fd, 0); + attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); + attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); + attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + + attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); + attr.map_flags = OPTS_GET(opts, map_flags, 0); + attr.map_extra = OPTS_GET(opts, map_extra, 0); + attr.numa_node = OPTS_GET(opts, numa_node, 0); + attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { - struct bpf_create_map_params p = {}; + LIBBPF_OPTS(bpf_map_create_opts, p); - p.map_type = create_attr->map_type; - p.key_size = create_attr->key_size; - p.value_size = create_attr->value_size; - p.max_entries = create_attr->max_entries; p.map_flags = create_attr->map_flags; - p.name = create_attr->name; p.numa_node = create_attr->numa_node; p.btf_fd = create_attr->btf_fd; p.btf_key_type_id = create_attr->btf_key_type_id; p.btf_value_type_id = create_attr->btf_value_type_id; p.map_ifindex = create_attr->map_ifindex; - if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) - p.btf_vmlinux_value_type_id = - create_attr->btf_vmlinux_value_type_id; + if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) + p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id; else p.inner_map_fd = create_attr->inner_map_fd; - return libbpf__bpf_create_map_xattr(&p); + return bpf_map_create(create_attr->map_type, create_attr->name, + create_attr->key_size, create_attr->value_size, + create_attr->max_entries, &p); } int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node) { - struct bpf_create_map_attr map_attr = {}; - - map_attr.name = name; - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; + LIBBPF_OPTS(bpf_map_create_opts, opts); + + opts.map_flags = map_flags; if (node >= 0) { - map_attr.numa_node = node; - map_attr.map_flags |= BPF_F_NUMA_NODE; + opts.numa_node = node; + opts.map_flags |= BPF_F_NUMA_NODE; } - return bpf_create_map_xattr(&map_attr); + return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); } int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags) { - struct bpf_create_map_attr map_attr = {}; + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; - - return bpf_create_map_xattr(&map_attr); + return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); } int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags) { - struct bpf_create_map_attr map_attr = {}; - - map_attr.name = name; - map_attr.map_type = map_type; - map_attr.map_flags = map_flags; - map_attr.key_size = key_size; - map_attr.value_size = value_size; - map_attr.max_entries = max_entries; + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); - return bpf_create_map_xattr(&map_attr); + return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts); } int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node) { - union bpf_attr attr; - int fd; - - memset(&attr, '\0', sizeof(attr)); - - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = 4; - attr.inner_map_fd = inner_map_fd; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - if (name) - memcpy(attr.map_name, name, - min(strlen(name), BPF_OBJ_NAME_LEN - 1)); + LIBBPF_OPTS(bpf_map_create_opts, opts); + opts.inner_map_fd = inner_map_fd; + opts.map_flags = map_flags; if (node >= 0) { - attr.map_flags |= BPF_F_NUMA_NODE; - attr.numa_node = node; + opts.map_flags |= BPF_F_NUMA_NODE; + opts.numa_node = node; } - fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); - return libbpf_err_errno(fd); + return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); } int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags) { - return bpf_create_map_in_map_node(map_type, name, key_size, - inner_map_fd, max_entries, map_flags, - -1); + LIBBPF_OPTS(bpf_map_create_opts, opts, + .inner_map_fd = inner_map_fd, + .map_flags = map_flags, + ); + + return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts); } static void * @@ -253,58 +312,95 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt, return info; } -int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) +DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) +int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts) { void *finfo = NULL, *linfo = NULL; + const char *func_info, *line_info; + __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; + __u32 func_info_rec_size, line_info_rec_size; + int fd, attempts; union bpf_attr attr; - int fd; + char *log_buf; + + bump_rlimit_memlock(); - if (!load_attr->log_buf != !load_attr->log_buf_sz) + if (!OPTS_VALID(opts, bpf_prog_load_opts)) return libbpf_err(-EINVAL); - if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) + attempts = OPTS_GET(opts, attempts, 0); + if (attempts < 0) return libbpf_err(-EINVAL); + if (attempts == 0) + attempts = PROG_LOAD_ATTEMPTS; memset(&attr, 0, sizeof(attr)); - attr.prog_type = load_attr->prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - if (load_attr->attach_prog_fd) - attr.attach_prog_fd = load_attr->attach_prog_fd; - else - attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd; - attr.attach_btf_id = load_attr->attach_btf_id; + attr.prog_type = prog_type; + attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = load_attr->kern_version; + attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); + attr.prog_flags = OPTS_GET(opts, prog_flags, 0); + attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); + attr.kern_version = OPTS_GET(opts, kern_version, 0); - attr.insn_cnt = (__u32)load_attr->insn_cnt; - attr.insns = ptr_to_u64(load_attr->insns); - attr.license = ptr_to_u64(load_attr->license); + if (prog_name) + libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); + attr.license = ptr_to_u64(license); - attr.log_level = load_attr->log_level; - if (attr.log_level) { - attr.log_buf = ptr_to_u64(load_attr->log_buf); - attr.log_size = load_attr->log_buf_sz; - } + if (insn_cnt > UINT_MAX) + return libbpf_err(-E2BIG); + + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)insn_cnt; + + attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); + attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); + + if (attach_prog_fd && attach_btf_obj_fd) + return libbpf_err(-EINVAL); + + attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); + if (attach_prog_fd) + attr.attach_prog_fd = attach_prog_fd; + else + attr.attach_btf_obj_fd = attach_btf_obj_fd; - attr.prog_btf_fd = load_attr->prog_btf_fd; - attr.prog_flags = load_attr->prog_flags; + log_buf = OPTS_GET(opts, log_buf, NULL); + log_size = OPTS_GET(opts, log_size, 0); + log_level = OPTS_GET(opts, log_level, 0); - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - attr.func_info = ptr_to_u64(load_attr->func_info); + if (!!log_buf != !!log_size) + return libbpf_err(-EINVAL); + if (log_level > (4 | 2 | 1)) + return libbpf_err(-EINVAL); + if (log_level && !log_buf) + return libbpf_err(-EINVAL); - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - attr.line_info = ptr_to_u64(load_attr->line_info); - attr.fd_array = ptr_to_u64(load_attr->fd_array); + func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); + func_info = OPTS_GET(opts, func_info, NULL); + attr.func_info_rec_size = func_info_rec_size; + attr.func_info = ptr_to_u64(func_info); + attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); - if (load_attr->name) - memcpy(attr.prog_name, load_attr->name, - min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); + line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); + line_info = OPTS_GET(opts, line_info, NULL); + attr.line_info_rec_size = line_info_rec_size; + attr.line_info = ptr_to_u64(line_info); + attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); + + attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_size; + attr.log_level = log_level; + } - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); if (fd >= 0) return fd; @@ -314,11 +410,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) */ while (errno == E2BIG && (!finfo || !linfo)) { if (!finfo && attr.func_info_cnt && - attr.func_info_rec_size < load_attr->func_info_rec_size) { + attr.func_info_rec_size < func_info_rec_size) { /* try with corrected func info records */ - finfo = alloc_zero_tailing_info(load_attr->func_info, - load_attr->func_info_cnt, - load_attr->func_info_rec_size, + finfo = alloc_zero_tailing_info(func_info, + attr.func_info_cnt, + func_info_rec_size, attr.func_info_rec_size); if (!finfo) { errno = E2BIG; @@ -326,13 +422,12 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) } attr.func_info = ptr_to_u64(finfo); - attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_rec_size = func_info_rec_size; } else if (!linfo && attr.line_info_cnt && - attr.line_info_rec_size < - load_attr->line_info_rec_size) { - linfo = alloc_zero_tailing_info(load_attr->line_info, - load_attr->line_info_cnt, - load_attr->line_info_rec_size, + attr.line_info_rec_size < line_info_rec_size) { + linfo = alloc_zero_tailing_info(line_info, + attr.line_info_cnt, + line_info_rec_size, attr.line_info_rec_size); if (!linfo) { errno = E2BIG; @@ -340,26 +435,27 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) } attr.line_info = ptr_to_u64(linfo); - attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_rec_size = line_info_rec_size; } else { break; } - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); if (fd >= 0) goto done; } - if (load_attr->log_level || !load_attr->log_buf) - goto done; + if (log_level == 0 && log_buf) { + /* log_level == 0 with non-NULL log_buf requires retrying on error + * with log_level == 1 and log_buf/log_buf_size set, to get details of + * failure + */ + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_size; + attr.log_level = 1; - /* Try again with log */ - attr.log_buf = ptr_to_u64(load_attr->log_buf); - attr.log_size = load_attr->log_buf_sz; - attr.log_level = 1; - load_attr->log_buf[0] = 0; - - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + } done: /* free() doesn't affect errno, so we don't need to restore it */ free(finfo); @@ -367,17 +463,20 @@ done: return libbpf_err_errno(fd); } +__attribute__((alias("bpf_load_program_xattr2"))) int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz) + char *log_buf, size_t log_buf_sz); + +static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz) { - struct bpf_prog_load_params p = {}; + LIBBPF_OPTS(bpf_prog_load_opts, p); if (!load_attr || !log_buf != !log_buf_sz) return libbpf_err(-EINVAL); - p.prog_type = load_attr->prog_type; p.expected_attach_type = load_attr->expected_attach_type; - switch (p.prog_type) { + switch (load_attr->prog_type) { case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_LSM: p.attach_btf_id = load_attr->attach_btf_id; @@ -391,12 +490,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, p.prog_ifindex = load_attr->prog_ifindex; p.kern_version = load_attr->kern_version; } - p.insn_cnt = load_attr->insns_cnt; - p.insns = load_attr->insns; - p.license = load_attr->license; p.log_level = load_attr->log_level; p.log_buf = log_buf; - p.log_buf_sz = log_buf_sz; + p.log_size = log_buf_sz; p.prog_btf_fd = load_attr->prog_btf_fd; p.func_info_rec_size = load_attr->func_info_rec_size; p.func_info_cnt = load_attr->func_info_cnt; @@ -404,10 +500,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, p.line_info_rec_size = load_attr->line_info_rec_size; p.line_info_cnt = load_attr->line_info_cnt; p.line_info = load_attr->line_info; - p.name = load_attr->name; p.prog_flags = load_attr->prog_flags; - return libbpf__bpf_prog_load(&p); + return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, + load_attr->insns, load_attr->insns_cnt, &p); } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -426,7 +522,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, load_attr.license = license; load_attr.kern_version = kern_version; - return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); + return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz); } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -437,6 +533,8 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, union bpf_attr attr; int fd; + bump_rlimit_memlock(); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; @@ -449,7 +547,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - fd = sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS); return libbpf_err_errno(fd); } @@ -593,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, return libbpf_err_errno(ret); } -int bpf_map_delete_batch(int fd, void *keys, __u32 *count, +int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, - NULL, keys, NULL, count, opts); + NULL, (void *)keys, NULL, count, opts); } int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, @@ -617,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, count, opts); } -int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, +int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, const struct bpf_map_batch_opts *opts) { return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, - keys, values, count, opts); + (void *)keys, (void *)values, count, opts); } int bpf_obj_pin(int fd, const char *pathname) @@ -1028,24 +1126,67 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) return libbpf_err_errno(fd); } -int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, - bool do_log) +int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts) { - union bpf_attr attr = {}; + const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); + union bpf_attr attr; + char *log_buf; + size_t log_size; + __u32 log_level; int fd; - attr.btf = ptr_to_u64(btf); + bump_rlimit_memlock(); + + memset(&attr, 0, attr_sz); + + if (!OPTS_VALID(opts, bpf_btf_load_opts)) + return libbpf_err(-EINVAL); + + log_buf = OPTS_GET(opts, log_buf, NULL); + log_size = OPTS_GET(opts, log_size, 0); + log_level = OPTS_GET(opts, log_level, 0); + + if (log_size > UINT_MAX) + return libbpf_err(-EINVAL); + if (log_size && !log_buf) + return libbpf_err(-EINVAL); + + attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + /* log_level == 0 and log_buf != NULL means "try loading without + * log_buf, but retry with log_buf and log_level=1 on error", which is + * consistent across low-level and high-level BTF and program loading + * APIs within libbpf and provides a sensible behavior in practice + */ + if (log_level) { + attr.btf_log_buf = ptr_to_u64(log_buf); + attr.btf_log_size = (__u32)log_size; + attr.btf_log_level = log_level; + } -retry: - if (do_log && log_buf && log_buf_size) { - attr.btf_log_level = 1; - attr.btf_log_size = log_buf_size; + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); + if (fd < 0 && log_buf && log_level == 0) { attr.btf_log_buf = ptr_to_u64(log_buf); + attr.btf_log_size = (__u32)log_size; + attr.btf_log_level = 1; + fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); } + return libbpf_err_errno(fd); +} - fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); +int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) +{ + LIBBPF_OPTS(bpf_btf_load_opts, opts); + int fd; + +retry: + if (do_log && log_buf && log_buf_size) { + opts.log_buf = log_buf; + opts.log_size = log_buf_size; + opts.log_level = 1; + } + fd = bpf_btf_load(btf, btf_size, &opts); if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; goto retry; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6fffb3cdf39b..14e0d97ad2cf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -29,11 +29,38 @@ #include <stdint.h> #include "libbpf_common.h" +#include "libbpf_legacy.h" #ifdef __cplusplus extern "C" { #endif +int libbpf_set_memlock_rlim(size_t memlock_bytes); + +struct bpf_map_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + __u32 btf_fd; + __u32 btf_key_type_id; + __u32 btf_value_type_id; + __u32 btf_vmlinux_value_type_id; + + __u32 inner_map_fd; + __u32 map_flags; + __u64 map_extra; + + __u32 numa_node; + __u32 map_ifindex; +}; +#define bpf_map_create_opts__last_field map_ifindex + +LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const struct bpf_map_create_opts *opts); + struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -52,25 +79,95 @@ struct bpf_create_map_attr { }; }; -LIBBPF_API int -bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") +LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags, int node); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name, int key_size, int value_size, int max_entries, __u32 map_flags); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags, int node); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, int key_size, int inner_map_fd, int max_entries, __u32 map_flags); +struct bpf_prog_load_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + /* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns + * -EAGAIN. This field determines how many attempts libbpf has to + * make. If not specified, libbpf will use default value of 5. + */ + int attempts; + + enum bpf_attach_type expected_attach_type; + __u32 prog_btf_fd; + __u32 prog_flags; + __u32 prog_ifindex; + __u32 kern_version; + + __u32 attach_btf_id; + __u32 attach_prog_fd; + __u32 attach_btf_obj_fd; + + const int *fd_array; + + /* .BTF.ext func info data */ + const void *func_info; + __u32 func_info_cnt; + __u32 func_info_rec_size; + + /* .BTF.ext line info data */ + const void *line_info; + __u32 line_info_cnt; + __u32 line_info_rec_size; + + /* verifier log options */ + __u32 log_level; + __u32 log_size; + char *log_buf; +}; +#define bpf_prog_load_opts__last_field log_buf + +LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts); +/* this "specialization" should go away in libbpf 1.0 */ +LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, + const char *prog_name, const char *license, + const struct bpf_insn *insns, size_t insn_cnt, + const struct bpf_prog_load_opts *opts); + +/* This is an elaborate way to not conflict with deprecated bpf_prog_load() + * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. + * With this approach, if someone is calling bpf_prog_load() with + * 4 arguments, they will use the deprecated API, which keeps backwards + * compatibility (both source code and binary). If bpf_prog_load() is called + * with 6 arguments, though, it gets redirected to __bpf_prog_load. + * So looking forward to libbpf 1.0 when this hack will be gone and + * __bpf_prog_load() will be called just bpf_prog_load(). + */ +#ifndef bpf_prog_load +#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) +#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ + bpf_prog_load_deprecated(file, type, pobj, prog_fd) +#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ + bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) +#endif /* bpf_prog_load */ + struct bpf_load_program_attr { enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; @@ -100,15 +197,18 @@ struct bpf_load_program_attr { /* Flags to direct loading requirements */ #define MAPS_RELAX_COMPAT 0x01 -/* Recommend log buffer size */ +/* Recommended log buffer size */ #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ -LIBBPF_API int -bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, - char *log_buf, size_t log_buf_sz); + +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") +LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, + char *log_buf, size_t log_buf_sz); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") LIBBPF_API int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, __u32 prog_flags, @@ -116,6 +216,23 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz, int log_level); +struct bpf_btf_load_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + + /* kernel log options */ + char *log_buf; + __u32 log_level; + __u32 log_size; +}; +#define bpf_btf_load_opts__last_field log_size + +LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, + const struct bpf_btf_load_opts *opts); + +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") +LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, + __u32 log_buf_size, bool do_log); + LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); @@ -137,17 +254,128 @@ struct bpf_map_batch_opts { }; #define bpf_map_batch_opts__last_field flags -LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + +/** + * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple + * elements in a BPF map. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param count input and output parameter; on input **count** represents the + * number of elements in the map to delete in batch; + * on output if a non-EFAULT error is returned, **count** represents the number of deleted + * elements if the output **count** value is not equal to the input **count** value + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch deletion works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys, __u32 *count, const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. + * + * The parameter *in_batch* is the address of the first element in the batch to read. + * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent + * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate + * that the batched lookup starts from the beginning of the map. + * + * The *keys* and *values* are output parameters which must point to memory large enough to + * hold *count* items based on the key and value size of the map *map_fd*. The *keys* + * buffer must be of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * indicate that the batched lookup starts from the beginning of the map. + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array large enough for *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read in batch; on output it's the number of elements that were + * successfully read. + * If a non-EFAULT error is returned, count will be set as the number of elements + * that were read before the error occurred. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch lookup works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion + * of BPF map elements where each element is deleted after being retrieved. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * get address of the first element in *out_batch* + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array of *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read and delete in batch; on output it represents the number of + * elements that were successfully read and deleted + * If a non-**EFAULT** error code is returned and if the output **count** value + * is not equal to the input **count** value, up to **count** elements may + * have been deleted. + * if **EFAULT** is returned up to *count* elements may have been deleted without + * being returned via the *keys* and *values* output parameters. + * @param opts options for configuring the way the batch lookup and delete works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch, void *keys, void *values, __u32 *count, const struct bpf_map_batch_opts *opts); -LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + +/** + * @brief **bpf_map_update_batch()** updates multiple elements in a map + * by specifying keys and their corresponding values. + * + * The *keys* and *values* parameters must point to memory large enough + * to hold *count* items based on the key and value size of the map. + * + * The *opts* parameter can be used to control how *bpf_map_update_batch()* + * should handle keys that either do or do not already exist in the map. + * In particular the *flags* parameter of *bpf_map_batch_opts* can be + * one of the following: + * + * Note that *count* is an input and output parameter, where on output it + * represents how many elements were successfully updated. Also note that if + * **EFAULT** then *count* should not be trusted to be correct. + * + * **BPF_ANY** + * Create new elements or update existing. + * + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * + * **BPF_EXIST** + * Update existing elements. + * + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param values pointer to an array of *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to update in batch; on output if a non-EFAULT error is returned, + * **count** represents the number of updated elements if the output **count** + * value is not equal to the input **count** value. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch update works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count, const struct bpf_map_batch_opts *opts); @@ -243,8 +471,6 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt); LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); -LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, - __u32 log_buf_size, bool do_log); LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 6f3df004479b..223308931d55 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -3,6 +3,8 @@ #ifndef __BPF_GEN_INTERNAL_H #define __BPF_GEN_INTERNAL_H +#include "bpf.h" + struct ksym_relo_desc { const char *name; int kind; @@ -37,6 +39,8 @@ struct bpf_gen { int error; struct ksym_relo_desc *relos; int relo_cnt; + struct bpf_core_relo *core_relos; + int core_relo_cnt; char attach_target[128]; int attach_kind; struct ksym_desc *ksyms; @@ -49,13 +53,20 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps); void bpf_gen__free(struct bpf_gen *gen); void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); -struct bpf_prog_load_params; -void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); +void bpf_gen__map_create(struct bpf_gen *gen, + enum bpf_map_type map_type, const char *map_name, + __u32 key_size, __u32 value_size, __u32 max_entries, + struct bpf_map_create_opts *map_attr, int map_idx); +void bpf_gen__prog_load(struct bpf_gen *gen, + enum bpf_prog_type prog_type, const char *prog_name, + const char *license, struct bpf_insn *insns, size_t insn_cnt, + struct bpf_prog_load_opts *load_attr, int prog_idx); void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, bool is_typeless, int kind, int insn_idx); +void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); +void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx); #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index db05a5937105..90f56b0f585f 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -66,277 +66,204 @@ #if defined(__KERNEL__) || defined(__VMLINUX_H__) -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) +#define __PT_PARM1_REG di +#define __PT_PARM2_REG si +#define __PT_PARM3_REG dx +#define __PT_PARM4_REG cx +#define __PT_PARM5_REG r8 +#define __PT_RET_REG sp +#define __PT_FP_REG bp +#define __PT_RC_REG ax +#define __PT_SP_REG sp +#define __PT_IP_REG ip #else #ifdef __i386__ -/* i386 kernel is built with -mregparm=3 */ -#define PT_REGS_PARM1(x) ((x)->eax) -#define PT_REGS_PARM2(x) ((x)->edx) -#define PT_REGS_PARM3(x) ((x)->ecx) -#define PT_REGS_PARM4(x) 0 -#define PT_REGS_PARM5(x) 0 -#define PT_REGS_RET(x) ((x)->esp) -#define PT_REGS_FP(x) ((x)->ebp) -#define PT_REGS_RC(x) ((x)->eax) -#define PT_REGS_SP(x) ((x)->esp) -#define PT_REGS_IP(x) ((x)->eip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) -#define PT_REGS_PARM4_CORE(x) 0 -#define PT_REGS_PARM5_CORE(x) 0 -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) - -#else -#define PT_REGS_PARM1(x) ((x)->rdi) -#define PT_REGS_PARM2(x) ((x)->rsi) -#define PT_REGS_PARM3(x) ((x)->rdx) -#define PT_REGS_PARM4(x) ((x)->rcx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->rsp) -#define PT_REGS_FP(x) ((x)->rbp) -#define PT_REGS_RC(x) ((x)->rax) -#define PT_REGS_SP(x) ((x)->rsp) -#define PT_REGS_IP(x) ((x)->rip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) - -#endif -#endif +#define __PT_PARM1_REG eax +#define __PT_PARM2_REG edx +#define __PT_PARM3_REG ecx +/* i386 kernel is built with -mregparm=3 */ +#define __PT_PARM4_REG __unsupported__ +#define __PT_PARM5_REG __unsupported__ +#define __PT_RET_REG esp +#define __PT_FP_REG ebp +#define __PT_RC_REG eax +#define __PT_SP_REG esp +#define __PT_IP_REG eip + +#else /* __i386__ */ + +#define __PT_PARM1_REG rdi +#define __PT_PARM2_REG rsi +#define __PT_PARM3_REG rdx +#define __PT_PARM4_REG rcx +#define __PT_PARM5_REG r8 +#define __PT_RET_REG rsp +#define __PT_FP_REG rbp +#define __PT_RC_REG rax +#define __PT_SP_REG rsp +#define __PT_IP_REG rip + +#endif /* __i386__ */ + +#endif /* __KERNEL__ || __VMLINUX_H__ */ #elif defined(bpf_target_s390) /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_S390 const volatile user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) -#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) -#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) -#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) -#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) -#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) -#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) +#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) +#define __PT_PARM1_REG gprs[2] +#define __PT_PARM2_REG gprs[3] +#define __PT_PARM3_REG gprs[4] +#define __PT_PARM4_REG gprs[5] +#define __PT_PARM5_REG gprs[6] +#define __PT_RET_REG grps[14] +#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG gprs[2] +#define __PT_SP_REG gprs[15] +#define __PT_IP_REG psw.addr #elif defined(bpf_target_arm) -#define PT_REGS_PARM1(x) ((x)->uregs[0]) -#define PT_REGS_PARM2(x) ((x)->uregs[1]) -#define PT_REGS_PARM3(x) ((x)->uregs[2]) -#define PT_REGS_PARM4(x) ((x)->uregs[3]) -#define PT_REGS_PARM5(x) ((x)->uregs[4]) -#define PT_REGS_RET(x) ((x)->uregs[14]) -#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->uregs[0]) -#define PT_REGS_SP(x) ((x)->uregs[13]) -#define PT_REGS_IP(x) ((x)->uregs[12]) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) +#define __PT_PARM1_REG uregs[0] +#define __PT_PARM2_REG uregs[1] +#define __PT_PARM3_REG uregs[2] +#define __PT_PARM4_REG uregs[3] +#define __PT_PARM5_REG uregs[4] +#define __PT_RET_REG uregs[14] +#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG uregs[0] +#define __PT_SP_REG uregs[13] +#define __PT_IP_REG uregs[12] #elif defined(bpf_target_arm64) /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_ARM64 const volatile struct user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) -#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) -#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) -#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) -#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) -#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) +#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) +#define __PT_PARM1_REG regs[0] +#define __PT_PARM2_REG regs[1] +#define __PT_PARM3_REG regs[2] +#define __PT_PARM4_REG regs[3] +#define __PT_PARM5_REG regs[4] +#define __PT_RET_REG regs[30] +#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG regs[0] +#define __PT_SP_REG sp +#define __PT_IP_REG pc #elif defined(bpf_target_mips) -#define PT_REGS_PARM1(x) ((x)->regs[4]) -#define PT_REGS_PARM2(x) ((x)->regs[5]) -#define PT_REGS_PARM3(x) ((x)->regs[6]) -#define PT_REGS_PARM4(x) ((x)->regs[7]) -#define PT_REGS_PARM5(x) ((x)->regs[8]) -#define PT_REGS_RET(x) ((x)->regs[31]) -#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[2]) -#define PT_REGS_SP(x) ((x)->regs[29]) -#define PT_REGS_IP(x) ((x)->cp0_epc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) +#define __PT_PARM1_REG regs[4] +#define __PT_PARM2_REG regs[5] +#define __PT_PARM3_REG regs[6] +#define __PT_PARM4_REG regs[7] +#define __PT_PARM5_REG regs[8] +#define __PT_RET_REG regs[31] +#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG regs[2] +#define __PT_SP_REG regs[29] +#define __PT_IP_REG cp0_epc #elif defined(bpf_target_powerpc) -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) +#define __PT_PARM1_REG gpr[3] +#define __PT_PARM2_REG gpr[4] +#define __PT_PARM3_REG gpr[5] +#define __PT_PARM4_REG gpr[6] +#define __PT_PARM5_REG gpr[7] +#define __PT_RET_REG regs[31] +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG gpr[3] +#define __PT_SP_REG sp +#define __PT_IP_REG nip #elif defined(bpf_target_sparc) -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) - +#define __PT_PARM1_REG u_regs[UREG_I0] +#define __PT_PARM2_REG u_regs[UREG_I1] +#define __PT_PARM3_REG u_regs[UREG_I2] +#define __PT_PARM4_REG u_regs[UREG_I3] +#define __PT_PARM5_REG u_regs[UREG_I4] +#define __PT_RET_REG u_regs[UREG_I7] +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG u_regs[UREG_I0] +#define __PT_SP_REG u_regs[UREG_FP] /* Should this also be a bpf_target check for the sparc case? */ #if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) +#define __PT_IP_REG tpc #else -#define PT_REGS_IP(x) ((x)->pc) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) +#define __PT_IP_REG pc #endif #elif defined(bpf_target_riscv) +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG a0 +#define __PT_PARM2_REG a1 +#define __PT_PARM3_REG a2 +#define __PT_PARM4_REG a3 +#define __PT_PARM5_REG a4 +#define __PT_RET_REG ra +#define __PT_FP_REG fp +#define __PT_RC_REG a5 +#define __PT_SP_REG sp +#define __PT_IP_REG epc + +#endif + +#if defined(bpf_target_defined) + struct pt_regs; -#define PT_REGS_RV const volatile struct user_regs_struct -#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) -#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) -#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) -#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) -#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) -#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) -#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) -#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) -#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) +/* allow some architecutres to override `struct pt_regs` */ +#ifndef __PT_REGS_CAST +#define __PT_REGS_CAST(x) (x) #endif +#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) +#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) +#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) +#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) +#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) +#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) +#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) +#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) +#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG) +#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG) + #if defined(bpf_target_powerpc) + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP + #elif defined(bpf_target_sparc) + #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_defined) + +#else + #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ - ({ bpf_probe_read_kernel(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) + ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) + #endif -#if !defined(bpf_target_defined) +#else /* defined(bpf_target_defined) */ #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -363,7 +290,7 @@ struct pt_regs; #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) -#endif /* !defined(bpf_target_defined) */ +#endif /* defined(bpf_target_defined) */ #ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b @@ -375,25 +302,23 @@ struct pt_regs; #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N #endif #ifndef ___bpf_narg -#define ___bpf_narg(...) \ - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) #endif -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) \ - ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) +#define ___bpf_ctx_cast(args...) ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) /* * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and @@ -426,19 +351,13 @@ ____##name(unsigned long long *ctx, ##args) struct pt_regs; -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) \ - ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) \ - ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) \ - ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) \ - ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) \ - ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) \ - ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) /* * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for @@ -464,11 +383,9 @@ typeof(name(0)) name(struct pt_regs *ctx) \ static __attribute__((always_inline)) typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) \ - ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) -#define ___bpf_kretprobe_args(args...) \ - ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_args1(x) ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) /* * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7e4c5586bd87..9aa19c89f758 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -299,6 +299,7 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: return base_size; case BTF_KIND_INT: return base_size + sizeof(__u32); @@ -349,6 +350,7 @@ static int btf_bswap_type_rest(struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: return 0; case BTF_KIND_INT: *(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); @@ -452,7 +454,7 @@ const struct btf *btf__base_btf(const struct btf *btf) } /* internal helper returning non-const pointer to a type */ -struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) +struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id) { if (type_id == 0) return &btf_void; @@ -608,6 +610,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_RESTRICT: case BTF_KIND_VAR: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: type_id = t->type; break; case BTF_KIND_ARRAY: @@ -649,6 +652,7 @@ int btf__align_of(const struct btf *btf, __u32 id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return btf__align_of(btf, t->type); case BTF_KIND_ARRAY: return btf__align_of(btf, btf_array(t)->type); @@ -1120,54 +1124,86 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load_into_kernel(struct btf *btf) +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) { - __u32 log_buf_size = 0, raw_size; - char *log_buf = NULL; + LIBBPF_OPTS(bpf_btf_load_opts, opts); + __u32 buf_sz = 0, raw_size; + char *buf = NULL, *tmp; void *raw_data; int err = 0; if (btf->fd >= 0) return libbpf_err(-EEXIST); + if (log_sz && !log_buf) + return libbpf_err(-EINVAL); -retry_load: - if (log_buf_size) { - log_buf = malloc(log_buf_size); - if (!log_buf) - return libbpf_err(-ENOMEM); - - *log_buf = 0; - } - + /* cache native raw data representation */ raw_data = btf_get_raw_data(btf, &raw_size, false); if (!raw_data) { err = -ENOMEM; goto done; } - /* cache native raw data representation */ btf->raw_size = raw_size; btf->raw_data = raw_data; - btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); +retry_load: + /* if log_level is 0, we won't provide log_buf/log_size to the kernel, + * initially. Only if BTF loading fails, we bump log_level to 1 and + * retry, using either auto-allocated or custom log_buf. This way + * non-NULL custom log_buf provides a buffer just in case, but hopes + * for successful load and no need for log_buf. + */ + if (log_level) { + /* if caller didn't provide custom log_buf, we'll keep + * allocating our own progressively bigger buffers for BTF + * verification log + */ + if (!log_buf) { + buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2); + tmp = realloc(buf, buf_sz); + if (!tmp) { + err = -ENOMEM; + goto done; + } + buf = tmp; + buf[0] = '\0'; + } + + opts.log_buf = log_buf ? log_buf : buf; + opts.log_size = log_buf ? log_sz : buf_sz; + opts.log_level = log_level; + } + + btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { - if (!log_buf || errno == ENOSPC) { - log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, - log_buf_size << 1); - free(log_buf); + /* time to turn on verbose mode and try again */ + if (log_level == 0) { + log_level = 1; goto retry_load; } + /* only retry if caller didn't provide custom log_buf, but + * make sure we can never overflow buf_sz + */ + if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2) + goto retry_load; err = -errno; - pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (*log_buf) - pr_warn("%s\n", log_buf); - goto done; + pr_warn("BTF loading error: %d\n", err); + /* don't print out contents of custom log_buf */ + if (!log_buf && buf[0]) + pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); } done: - free(log_buf); + free(buf); return libbpf_err(err); } + +int btf__load_into_kernel(struct btf *btf) +{ + return btf_load_into_kernel(btf, NULL, 0, 0); +} + int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); int btf__fd(const struct btf *btf) @@ -2236,6 +2272,22 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) } /* + * Append new BTF_KIND_TYPE_TAG type with: + * - *value*, non-empty/non-NULL tag value; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) +{ + if (!value|| !value[0]) + return libbpf_err(-EINVAL); + + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); +} + +/* * Append new BTF_KIND_FUNC type with: * - *name*, non-empty/non-NULL name; * - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; @@ -2711,15 +2763,11 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext); } -struct btf_ext *btf_ext__new(__u8 *data, __u32 size) +struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) { struct btf_ext *btf_ext; int err; - err = btf_ext_parse_hdr(data, size); - if (err) - return libbpf_err_ptr(err); - btf_ext = calloc(1, sizeof(struct btf_ext)); if (!btf_ext) return libbpf_err_ptr(-ENOMEM); @@ -2732,6 +2780,10 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); + err = btf_ext_parse_hdr(btf_ext->data, size); + if (err) + goto done; + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { err = -EINVAL; goto done; @@ -2846,8 +2898,7 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) struct btf_dedup; -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts); +static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts); static void btf_dedup_free(struct btf_dedup *d); static int btf_dedup_prep(struct btf_dedup *d); static int btf_dedup_strings(struct btf_dedup *d); @@ -2994,12 +3045,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d); * deduplicating structs/unions is described in greater details in comments for * `btf_dedup_is_equiv` function. */ -int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts) + +DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) +int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts) { - struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + struct btf_dedup *d; int err; + if (!OPTS_VALID(opts, btf_dedup_opts)) + return libbpf_err(-EINVAL); + + d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); return libbpf_err(-EINVAL); @@ -3051,6 +3107,19 @@ done: return libbpf_err(err); } +COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) +int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) +{ + LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); + + if (unused_opts) { + pr_warn("please use new version of btf__dedup() that supports options\n"); + return libbpf_err(-ENOTSUP); + } + + return btf__dedup(btf, &opts); +} + #define BTF_UNPROCESSED_ID ((__u32)-1) #define BTF_IN_PROGRESS_ID ((__u32)-2) @@ -3163,8 +3232,7 @@ static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx) return k1 == k2; } -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts) +static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts) { struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; @@ -3173,13 +3241,11 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, if (!d) return ERR_PTR(-ENOMEM); - d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; - /* dedup_table_size is now used only to force collisions in tests */ - if (opts && opts->dedup_table_size == 1) + if (OPTS_GET(opts, force_collisions, false)) hash_fn = btf_dedup_collision_hash_fn; d->btf = btf; - d->btf_ext = btf_ext; + d->btf_ext = OPTS_GET(opts, btf_ext, NULL); d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL); if (IS_ERR(d->dedup_table)) { @@ -3443,8 +3509,8 @@ static long btf_hash_struct(struct btf_type *t) } /* - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type - * IDs. This check is performed during type graph equivalence check and + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced + * type IDs. This check is performed during type graph equivalence check and * referenced types equivalence is checked separately. */ static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) @@ -3625,6 +3691,7 @@ static int btf_dedup_prep(struct btf_dedup *d) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: h = btf_hash_common(t); break; case BTF_KIND_INT: @@ -3685,6 +3752,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_VAR: case BTF_KIND_DATASEC: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: return 0; case BTF_KIND_INT: @@ -3708,8 +3776,6 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) new_id = cand_id; break; } - if (d->opts.dont_resolve_fwds) - continue; if (btf_compat_enum(t, cand)) { if (btf_is_enum_fwd(t)) { /* resolve fwd to full enum */ @@ -3817,6 +3883,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) return btf_equal_array(t1, t2); } +/* Check if given two types are identical STRUCT/UNION definitions */ +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) +{ + const struct btf_member *m1, *m2; + struct btf_type *t1, *t2; + int n, i; + + t1 = btf_type_by_id(d->btf, id1); + t2 = btf_type_by_id(d->btf, id2); + + if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) + return false; + + if (!btf_shallow_equal_struct(t1, t2)) + return false; + + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { + if (m1->type != m2->type) + return false; + } + return true; +} + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -3928,6 +4019,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, hypot_type_id = d->hypot_map[canon_id]; if (hypot_type_id <= BTF_MAX_NR_TYPES) { + if (hypot_type_id == cand_id) + return 1; /* In some cases compiler will generate different DWARF types * for *identical* array type definitions and use them for * different fields within the *same* struct. This breaks type @@ -3936,8 +4029,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * types within a single CU. So work around that by explicitly * allowing identical array types here. */ - return hypot_type_id == cand_id || - btf_dedup_identical_arrays(d, hypot_type_id, cand_id); + if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) + return 1; + /* It turns out that similar situation can happen with + * struct/union sometimes, sigh... Handle the case where + * structs/unions are exactly the same, down to the referenced + * type IDs. Anything more complicated (e.g., if referenced + * types are different, but equivalent) is *way more* + * complicated and requires a many-to-many equivalence mapping. + */ + if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) + return 1; + return 0; } if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) @@ -3952,8 +4055,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return 0; /* FWD <--> STRUCT/UNION equivalence check, if enabled */ - if (!d->opts.dont_resolve_fwds - && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) && cand_kind != canon_kind) { __u16 real_kind; __u16 fwd_kind; @@ -3979,10 +4081,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return btf_equal_int_tag(cand_type, canon_type); case BTF_KIND_ENUM: - if (d->opts.dont_resolve_fwds) - return btf_equal_enum(cand_type, canon_type); - else - return btf_compat_enum(cand_type, canon_type); + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: case BTF_KIND_FLOAT: @@ -3994,6 +4093,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: if (cand_type->info != canon_type->info) return 0; return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); @@ -4289,6 +4389,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); if (ref_type_id < 0) return ref_type_id; @@ -4595,6 +4696,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: return visit(&t->type, ctx); case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index bc005ba3ceec..061839f04525 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -157,7 +157,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_value_size, __u32 *key_type_id, __u32 *value_type_id); -LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); @@ -227,6 +227,7 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); /* func and func_proto construction APIs */ LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -245,25 +246,86 @@ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_typ int component_idx); struct btf_dedup_opts { - unsigned int dedup_table_size; - bool dont_resolve_fwds; + size_t sz; + /* optional .BTF.ext info to dedup along the main BTF info */ + struct btf_ext *btf_ext; + /* force hash collisions (used for testing) */ + bool force_collisions; + size_t :0; }; +#define btf_dedup_opts__last_field force_collisions + +LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); + +LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); -LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, - const struct btf_dedup_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") +LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); +#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) +#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) +#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts) struct btf_dump; struct btf_dump_opts { - void *ctx; + union { + size_t sz; + void *ctx; /* DEPRECATED: will be gone in v1.0 */ + }; }; typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args); LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn); + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts); + +LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts); + +LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn); + +/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the + * type of 4th argument. If it's btf_dump's print callback, use deprecated + * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() + * doesn't work here because both variants have 4 input arguments. + * + * (void *) casts are necessary to avoid compilation warnings about type + * mismatches, because even though __builtin_choose_expr() only ever evaluates + * one side the other side still has to satisfy type constraints (this is + * compiler implementation limitation which might be lifted eventually, + * according to the documentation). So passing struct btf_ext in place of + * btf_dump_printf_fn_t would be generating compilation warning. Casting to + * void * avoids this issue. + * + * Also, two type compatibility checks for a function and function pointer are + * required because passing function reference into btf_dump__new() as + * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., + * &my_callback, ...) (not explicit ampersand in the latter case) actually + * differs as far as __builtin_types_compatible_p() is concerned. Thus two + * checks are combined to detect callback argument. + * + * The rest works just like in case of ___libbpf_override() usage with symbol + * versioning. + * + * C++ compilers don't support __builtin_types_compatible_p(), so at least + * don't screw up compilation for them and let C++ users pick btf_dump__new + * vs btf_dump__new_deprecated explicitly. + */ +#ifndef __cplusplus +#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) || \ + __builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)), \ + btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4), \ + btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) +#endif + LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); @@ -403,7 +465,8 @@ static inline bool btf_is_mod(const struct btf_type *t) return kind == BTF_KIND_VOLATILE || kind == BTF_KIND_CONST || - kind == BTF_KIND_RESTRICT; + kind == BTF_KIND_RESTRICT || + kind == BTF_KIND_TYPE_TAG; } static inline bool btf_is_func(const struct btf_type *t) @@ -436,6 +499,11 @@ static inline bool btf_is_decl_tag(const struct btf_type *t) return btf_kind(t) == BTF_KIND_DECL_TAG; } +static inline bool btf_is_type_tag(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_TYPE_TAG; +} + static inline __u8 btf_int_encoding(const struct btf_type *t) { return BTF_INT_ENCODING(*(__u32 *)(t + 1)); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 17db62b5002e..b9a3260c83cb 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -77,9 +77,8 @@ struct btf_dump_data { struct btf_dump { const struct btf *btf; - const struct btf_ext *btf_ext; btf_dump_printf_fn_t printf_fn; - struct btf_dump_opts opts; + void *cb_ctx; int ptr_sz; bool strip_mods; bool skip_anon_defs; @@ -138,29 +137,32 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) va_list args; va_start(args, fmt); - d->printf_fn(d->opts.ctx, fmt, args); + d->printf_fn(d->cb_ctx, fmt, args); va_end(args); } static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_resize(struct btf_dump *d); -struct btf_dump *btf_dump__new(const struct btf *btf, - const struct btf_ext *btf_ext, - const struct btf_dump_opts *opts, - btf_dump_printf_fn_t printf_fn) +DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) +struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, + btf_dump_printf_fn_t printf_fn, + void *ctx, + const struct btf_dump_opts *opts) { struct btf_dump *d; int err; + if (!printf_fn) + return libbpf_err_ptr(-EINVAL); + d = calloc(1, sizeof(struct btf_dump)); if (!d) return libbpf_err_ptr(-ENOMEM); d->btf = btf; - d->btf_ext = btf_ext; d->printf_fn = printf_fn; - d->opts.ctx = opts ? opts->ctx : NULL; + d->cb_ctx = ctx; d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *); d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); @@ -186,6 +188,17 @@ err: return libbpf_err_ptr(err); } +COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) +struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, + const struct btf_ext *btf_ext, + const struct btf_dump_opts *opts, + btf_dump_printf_fn_t printf_fn) +{ + if (!printf_fn) + return libbpf_err_ptr(-EINVAL); + return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); +} + static int btf_dump_resize(struct btf_dump *d) { int err, last_id = btf__type_cnt(d->btf) - 1; @@ -317,6 +330,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d) case BTF_KIND_FUNC: case BTF_KIND_VAR: case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: d->type_states[t->type].referenced = 1; break; @@ -560,6 +574,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: return btf_dump_order_type(d, t->type, through_ptr); case BTF_KIND_FUNC_PROTO: { @@ -734,6 +749,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: + case BTF_KIND_TYPE_TAG: btf_dump_emit_type(d, t->type, cont_id); break; case BTF_KIND_ARRAY: @@ -1154,6 +1170,7 @@ skip_mod: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_FUNC_PROTO: + case BTF_KIND_TYPE_TAG: id = t->type; break; case BTF_KIND_ARRAY: @@ -1322,6 +1339,11 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_RESTRICT: btf_dump_printf(d, " restrict"); break; + case BTF_KIND_TYPE_TAG: + btf_dump_emit_mods(d, decls); + name = btf_name_of(d, t->name_off); + btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); + break; case BTF_KIND_ARRAY: { const struct btf_array *a = btf_array(t); const struct btf_type *next_t; @@ -2194,7 +2216,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d, __u8 bits_offset, __u8 bit_sz) { - int size, err; + int size, err = 0; size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); if (size < 0) @@ -2299,8 +2321,8 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, if (!opts->indent_str) d->typed_dump->indent_str[0] = '\t'; else - strncat(d->typed_dump->indent_str, opts->indent_str, - sizeof(d->typed_dump->indent_str) - 1); + libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, + sizeof(d->typed_dump->indent_str)); d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 9934851ccde7..8ecef1088ba2 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -371,8 +371,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) { int i; - if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) { - pr_warn("progs/maps mismatch\n"); + if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) { + pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n", + nr_progs, gen->nr_progs, nr_maps, gen->nr_maps); gen->error = -EFAULT; return gen->error; } @@ -445,47 +446,32 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, } void bpf_gen__map_create(struct bpf_gen *gen, - struct bpf_create_map_params *map_attr, int map_idx) + enum bpf_map_type map_type, + const char *map_name, + __u32 key_size, __u32 value_size, __u32 max_entries, + struct bpf_map_create_opts *map_attr, int map_idx) { - int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); + int attr_size = offsetofend(union bpf_attr, map_extra); bool close_inner_map_fd = false; int map_create_attr, idx; union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_attr->map_type; - attr.key_size = map_attr->key_size; - attr.value_size = map_attr->value_size; + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; attr.map_flags = map_attr->map_flags; attr.map_extra = map_attr->map_extra; - memcpy(attr.map_name, map_attr->name, - min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); + if (map_name) + libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.numa_node = map_attr->numa_node; attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = map_attr->max_entries; - switch (attr.map_type) { - case BPF_MAP_TYPE_PERF_EVENT_ARRAY: - case BPF_MAP_TYPE_CGROUP_ARRAY: - case BPF_MAP_TYPE_STACK_TRACE: - case BPF_MAP_TYPE_ARRAY_OF_MAPS: - case BPF_MAP_TYPE_HASH_OF_MAPS: - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - case BPF_MAP_TYPE_CPUMAP: - case BPF_MAP_TYPE_XSKMAP: - case BPF_MAP_TYPE_SOCKMAP: - case BPF_MAP_TYPE_SOCKHASH: - case BPF_MAP_TYPE_QUEUE: - case BPF_MAP_TYPE_STACK: - case BPF_MAP_TYPE_RINGBUF: - break; - default: - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - } + attr.max_entries = max_entries; + attr.btf_key_type_id = map_attr->btf_key_type_id; + attr.btf_value_type_id = map_attr->btf_value_type_id; pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); + attr.map_name, map_idx, map_type, attr.btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); if (attr.btf_value_type_id) @@ -512,7 +498,7 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_attr->map_type, attr.value_size, + attr.map_name, map_idx, map_type, value_size, attr.btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ @@ -597,8 +583,9 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak, static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo) { struct ksym_desc *kdesc; + int i; - for (int i = 0; i < gen->nr_ksyms; i++) { + for (i = 0; i < gen->nr_ksyms; i++) { if (!strcmp(gen->ksyms[i].name, relo->name)) { gen->ksyms[i].ref++; return &gen->ksyms[i]; @@ -700,27 +687,29 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo return; } kdesc->off = btf_fd_idx; - /* set a default value for imm */ + /* jump to success case */ + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + /* set value for imm, off as 0 */ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); - /* skip success case store if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip success case for ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10)); /* store btf_id into insn[insn_idx].imm */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); + /* obtain fd in BPF_REG_9 */ + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); + /* jump to fd_array store if fd denotes module BTF */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); + /* set the default value for off */ + emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); + /* skip BTF fd store for vmlinux BTF */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* load fd_array slot pointer */ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); - /* skip store of BTF fd if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3)); /* store BTF fd in slot */ - emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); - emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); - /* set a default value for off */ - emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); - /* skip insn->off store if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); - /* skip if vmlinux BTF */ - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1)); /* store index into insn[insn_idx].off */ emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); log: @@ -819,9 +808,8 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, kdesc->insn + offsetof(struct bpf_insn, imm)); move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4, kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); - emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); - /* jump over src_reg adjustment if imm is not 0 */ - emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); + /* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ + emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3)); goto clear_src_reg; } /* remember insn offset, so we can copy BTF ID and FD later */ @@ -829,17 +817,20 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit_bpf_find_by_name_kind(gen, relo); if (!relo->is_weak) emit_check_err(gen); - /* set default values as 0 */ + /* jump to success case */ + emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); + /* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */ emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); - /* skip success case stores if ret < 0 */ - emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); + /* skip success case for ret < 0 */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); /* store btf_id into insn[insn_idx].imm */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); /* store btf_obj_fd into insn[insn_idx + 1].imm */ emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); + /* skip src_reg adjustment */ emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ @@ -851,6 +842,22 @@ clear_src_reg: emit_ksym_relo_log(gen, relo, kdesc->ref); } +void bpf_gen__record_relo_core(struct bpf_gen *gen, + const struct bpf_core_relo *core_relo) +{ + struct bpf_core_relo *relos; + + relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos)); + if (!relos) { + gen->error = -ENOMEM; + return; + } + gen->core_relos = relos; + relos += gen->core_relo_cnt; + memcpy(relos, core_relo, sizeof(*relos)); + gen->core_relo_cnt++; +} + static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) { int insn; @@ -883,6 +890,15 @@ static void emit_relos(struct bpf_gen *gen, int insns) emit_relo(gen, gen->relos + i, insns); } +static void cleanup_core_relo(struct bpf_gen *gen) +{ + if (!gen->core_relo_cnt) + return; + free(gen->core_relos); + gen->core_relo_cnt = 0; + gen->core_relos = NULL; +} + static void cleanup_relos(struct bpf_gen *gen, int insns) { int i, insn; @@ -910,30 +926,32 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) gen->relo_cnt = 0; gen->relos = NULL; } + cleanup_core_relo(gen); } void bpf_gen__prog_load(struct bpf_gen *gen, - struct bpf_prog_load_params *load_attr, int prog_idx) + enum bpf_prog_type prog_type, const char *prog_name, + const char *license, struct bpf_insn *insns, size_t insn_cnt, + struct bpf_prog_load_opts *load_attr, int prog_idx) { - int attr_size = offsetofend(union bpf_attr, fd_array); - int prog_load_attr, license, insns, func_info, line_info; + int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; + int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd\n", - load_attr->prog_type, load_attr->insn_cnt); + pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", + prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ - license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); + license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ - insns = add_data(gen, load_attr->insns, - load_attr->insn_cnt * sizeof(struct bpf_insn)); + insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); - attr.prog_type = load_attr->prog_type; + attr.prog_type = prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.attach_btf_id = load_attr->attach_btf_id; attr.prog_ifindex = load_attr->prog_ifindex; attr.kern_version = 0; - attr.insn_cnt = (__u32)load_attr->insn_cnt; + attr.insn_cnt = (__u32)insn_cnt; attr.prog_flags = load_attr->prog_flags; attr.func_info_rec_size = load_attr->func_info_rec_size; @@ -946,15 +964,19 @@ void bpf_gen__prog_load(struct bpf_gen *gen, line_info = add_data(gen, load_attr->line_info, attr.line_info_cnt * attr.line_info_rec_size); - memcpy(attr.prog_name, load_attr->name, - min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.core_relo_rec_size = sizeof(struct bpf_core_relo); + attr.core_relo_cnt = gen->core_relo_cnt; + core_relos = add_data(gen, gen->core_relos, + attr.core_relo_cnt * attr.core_relo_rec_size); + + libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); /* populate union bpf_attr with a pointer to license */ - emit_rel_store(gen, attr_field(prog_load_attr, license), license); + emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); /* populate union bpf_attr with a pointer to instructions */ - emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); + emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off); /* populate union bpf_attr with a pointer to func_info */ emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); @@ -962,6 +984,9 @@ void bpf_gen__prog_load(struct bpf_gen *gen, /* populate union bpf_attr with a pointer to line_info */ emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); + /* populate union bpf_attr with a pointer to core_relos */ + emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos); + /* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */ emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); @@ -986,15 +1011,17 @@ void bpf_gen__prog_load(struct bpf_gen *gen, emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(union bpf_attr, attach_btf_obj_fd))); } - emit_relos(gen, insns); + emit_relos(gen, insns_off); /* emit PROG_LOAD command */ emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ - cleanup_relos(gen, insns); - if (gen->attach_kind) + cleanup_relos(gen, insns_off); + if (gen->attach_kind) { emit_sys_close_blob(gen, attr_field(prog_load_attr, attach_btf_obj_fd)); + gen->attach_kind = 0; + } emit_check_err(gen); /* remember prog_fd in the stack, if successful */ emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, @@ -1040,6 +1067,33 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit_check_err(gen); } +void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, + int inner_map_idx) +{ + int attr_size = offsetofend(union bpf_attr, flags); + int map_update_attr, key; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", + outer_map_idx, slot, inner_map_idx); + + key = add_data(gen, &slot, sizeof(slot)); + + map_update_attr = add_data(gen, &attr, attr_size); + move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, + blob_fd_array_off(gen, outer_map_idx)); + emit_rel_store(gen, attr_field(map_update_attr, key), key); + emit_rel_store(gen, attr_field(map_update_attr, value), + blob_fd_array_off(gen, inner_map_idx)); + + /* emit MAP_UPDATE_ELEM command */ + emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); + debug_ret(gen, "populate_outer_map outer %d key %d inner %d", + outer_map_idx, slot, inner_map_idx); + emit_check_err(gen); +} + void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) { int attr_size = offsetofend(union bpf_attr, map_fd); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7c74342bb668..7f10dd501a52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -168,39 +168,24 @@ int libbpf_set_strict_mode(enum libbpf_strict_mode mode) return 0; } -enum kern_feature_id { - /* v4.14: kernel support for program & map names. */ - FEAT_PROG_NAME, - /* v5.2: kernel support for global data sections. */ - FEAT_GLOBAL_DATA, - /* BTF support */ - FEAT_BTF, - /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ - FEAT_BTF_FUNC, - /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ - FEAT_BTF_DATASEC, - /* BTF_FUNC_GLOBAL is supported */ - FEAT_BTF_GLOBAL_FUNC, - /* BPF_F_MMAPABLE is supported for arrays */ - FEAT_ARRAY_MMAP, - /* kernel support for expected_attach_type in BPF_PROG_LOAD */ - FEAT_EXP_ATTACH_TYPE, - /* bpf_probe_read_{kernel,user}[_str] helpers */ - FEAT_PROBE_READ_KERN, - /* BPF_PROG_BIND_MAP is supported */ - FEAT_PROG_BIND_MAP, - /* Kernel support for module BTFs */ - FEAT_MODULE_BTF, - /* BTF_KIND_FLOAT support */ - FEAT_BTF_FLOAT, - /* BPF perf link support */ - FEAT_PERF_LINK, - /* BTF_KIND_DECL_TAG support */ - FEAT_BTF_DECL_TAG, - __FEAT_CNT, -}; +__u32 libbpf_major_version(void) +{ + return LIBBPF_MAJOR_VERSION; +} -static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +__u32 libbpf_minor_version(void) +{ + return LIBBPF_MINOR_VERSION; +} + +const char *libbpf_version_string(void) +{ +#define __S(X) #X +#define _S(X) __S(X) + return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); +#undef _S +#undef __S +} enum reloc_type { RELO_LD64, @@ -209,19 +194,25 @@ enum reloc_type { RELO_EXTERN_VAR, RELO_EXTERN_FUNC, RELO_SUBPROG_ADDR, + RELO_CORE, }; struct reloc_desc { enum reloc_type type; int insn_idx; - int map_idx; - int sym_off; + union { + const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ + struct { + int map_idx; + int sym_off; + }; + }; }; struct bpf_sec_def; typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); +typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie); typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie); /* stored as sec_def->cookie for all libbpf-supported SEC()s */ @@ -304,7 +295,11 @@ struct bpf_program { struct reloc_desc *reloc_desc; int nr_reloc; - int log_level; + + /* BPF verifier log settings */ + char *log_buf; + size_t log_size; + __u32 log_level; struct { int nr; @@ -400,6 +395,7 @@ struct bpf_map { char *pin_path; bool pinned; bool reused; + bool skipped; __u64 map_extra; }; @@ -546,6 +542,11 @@ struct bpf_object { size_t btf_module_cnt; size_t btf_module_cap; + /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ + char *log_buf; + size_t log_size; + __u32 log_level; + void *priv; bpf_object_clear_priv_t clear_priv; @@ -681,6 +682,9 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->instances.fds = NULL; prog->instances.nr = -1; + /* inherit object's log_level */ + prog->log_level = obj->log_level; + prog->sec_name = strdup(sec_name); if (!prog->sec_name) goto errout; @@ -791,11 +795,36 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } -static __u32 get_kernel_version(void) +__u32 get_kernel_version(void) { + /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, + * but Ubuntu provides /proc/version_signature file, as described at + * https://ubuntu.com/kernel, with an example contents below, which we + * can use to get a proper LINUX_VERSION_CODE. + * + * Ubuntu 5.4.0-12.15-generic 5.4.8 + * + * In the above, 5.4.8 is what kernel is actually expecting, while + * uname() call will return 5.4.0 in info.release. + */ + const char *ubuntu_kver_file = "/proc/version_signature"; __u32 major, minor, patch; struct utsname info; + if (access(ubuntu_kver_file, R_OK) == 0) { + FILE *f; + + f = fopen(ubuntu_kver_file, "r"); + if (f) { + if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { + fclose(f); + return KERNEL_VERSION(major, minor, patch); + } + fclose(f); + } + /* something went wrong, fall back to uname() approach */ + } + uname(&info); if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) return 0; @@ -1161,12 +1190,10 @@ static struct bpf_object *bpf_object__new(const char *path, strcpy(obj->path, path); if (obj_name) { - strncpy(obj->name, obj_name, sizeof(obj->name) - 1); - obj->name[sizeof(obj->name) - 1] = 0; + libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name)); } else { /* Using basename() GNU version which doesn't modify arg. */ - strncpy(obj->name, basename((void *)path), - sizeof(obj->name) - 1); + libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name)); end = strchr(obj->name, '.'); if (end) *end = 0; @@ -1318,7 +1345,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj) static int bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) { - memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); + /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't + * go over allowed ELF data section buffer + */ + libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license))); pr_debug("license of %s is %s\n", obj->path, obj->license); return 0; } @@ -2076,6 +2106,7 @@ static const char *__btf_kind_str(__u16 kind) case BTF_KIND_DATASEC: return "datasec"; case BTF_KIND_FLOAT: return "float"; case BTF_KIND_DECL_TAG: return "decl_tag"; + case BTF_KIND_TYPE_TAG: return "type_tag"; default: return "unknown"; } } @@ -2255,6 +2286,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE; } else if (strcmp(name, "values") == 0) { + bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); + bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; + const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value"; char inner_map_name[128]; int err; @@ -2268,8 +2302,8 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_name, name); return -EINVAL; } - if (!bpf_map_type__is_map_in_map(map_def->map_type)) { - pr_warn("map '%s': should be map-in-map.\n", + if (!is_map_in_map && !is_prog_array) { + pr_warn("map '%s': should be map-in-map or prog-array.\n", map_name); return -ENOTSUP; } @@ -2281,22 +2315,30 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, map_def->value_size = 4; t = btf__type_by_id(btf, m->type); if (!t) { - pr_warn("map '%s': map-in-map inner type [%d] not found.\n", - map_name, m->type); + pr_warn("map '%s': %s type [%d] not found.\n", + map_name, desc, m->type); return -EINVAL; } if (!btf_is_array(t) || btf_array(t)->nelems) { - pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", - map_name); + pr_warn("map '%s': %s spec is not a zero-sized array.\n", + map_name, desc); return -EINVAL; } t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL); if (!btf_is_ptr(t)) { - pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", - map_name, btf_kind_str(t)); + pr_warn("map '%s': %s def is of unexpected kind %s.\n", + map_name, desc, btf_kind_str(t)); return -EINVAL; } t = skip_mods_and_typedefs(btf, t->type, NULL); + if (is_prog_array) { + if (!btf_is_func_proto(t)) { + pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", + map_name, btf_kind_str(t)); + return -EINVAL; + } + continue; + } if (!btf_is_struct(t)) { pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", map_name, btf_kind_str(t)); @@ -2588,8 +2630,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); + bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); - return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; + return !has_func || !has_datasec || !has_func_global || !has_float || + !has_decl_tag || !has_type_tag; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2599,6 +2643,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); + bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); struct btf_type *t; int i, j, vlen; @@ -2657,6 +2702,10 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) */ t->name_off = 0; t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); + } else if (!has_type_tag && btf_is_type_tag(t)) { + /* replace TYPE_TAG with a CONST */ + t->name_off = 0; + t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0); } } } @@ -2752,13 +2801,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { t_var = btf__type_by_id(btf, vsi->type); - var = btf_var(t_var); - - if (!btf_is_var(t_var)) { + if (!t_var || !btf_is_var(t_var)) { pr_debug("Non-VAR type seen in section %s\n", name); return -EINVAL; } + var = btf_var(t_var); if (var->linkage == BTF_VAR_STATIC) continue; @@ -2972,7 +3020,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) */ btf__set_fd(kern_btf, 0); } else { - err = btf__load_into_kernel(kern_btf); + /* currently BPF_BTF_LOAD only supports log_level 1 */ + err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, + obj->log_level ? 1 : 0); } if (sanitize) { if (!err) { @@ -3191,11 +3241,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf_Scn *scn; Elf64_Shdr *sh; - /* ELF section indices are 1-based, so allocate +1 element to keep - * indexing simple. Also include 0th invalid section into sec_cnt for - * simpler and more traditional iteration logic. + /* ELF section indices are 0-based, but sec #0 is special "invalid" + * section. e_shnum does include sec #0, so e_shnum is the necessary + * size of an array to keep all the sections. */ - obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; + obj->efile.sec_cnt = obj->efile.ehdr->e_shnum; obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs)); if (!obj->efile.secs) return -ENOMEM; @@ -3271,8 +3321,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { + if (sh->sh_type != SHT_PROGBITS) + return -LIBBPF_ERRNO__FORMAT; btf_data = data; } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + if (sh->sh_type != SHT_PROGBITS) + return -LIBBPF_ERRNO__FORMAT; btf_ext_data = data; } else if (sh->sh_type == SHT_SYMTAB) { /* already processed during the first pass above */ @@ -3303,6 +3357,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (sh->sh_type == SHT_REL) { int targ_sec_idx = sh->sh_info; /* points to other section */ + if (sh->sh_entsize != sizeof(Elf64_Rel) || + targ_sec_idx >= obj->efile.sec_cnt) + return -LIBBPF_ERRNO__FORMAT; + /* Only do relo for section with exec instructions */ if (!section_have_execinstr(obj, targ_sec_idx) && strcmp(name, ".rel" STRUCT_OPS_SEC) && @@ -3333,7 +3391,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) /* sort BPF programs by section name and in-section instruction offset * for faster search */ - qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); + if (obj->nr_programs) + qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); return bpf_object__init_btf(obj, btf_data, btf_ext_data); } @@ -3555,7 +3614,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); sh = elf_sec_hdr(obj, scn); - if (!sh) + if (!sh || sh->sh_entsize != sizeof(Elf64_Sym)) return -LIBBPF_ERRNO__FORMAT; dummy_var_btf_id = add_dummy_ksym_var(obj->btf); @@ -4022,7 +4081,7 @@ static int bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data) { const char *relo_sec_name, *sec_name; - size_t sec_idx = shdr->sh_info; + size_t sec_idx = shdr->sh_info, sym_idx; struct bpf_program *prog; struct reloc_desc *relos; int err, i, nrels; @@ -4033,6 +4092,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat Elf64_Sym *sym; Elf64_Rel *rel; + if (sec_idx >= obj->efile.sec_cnt) + return -EINVAL; + scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); @@ -4052,16 +4114,23 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat return -LIBBPF_ERRNO__FORMAT; } - sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); + sym_idx = ELF64_R_SYM(rel->r_info); + sym = elf_sym_by_idx(obj, sym_idx); if (!sym) { - pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", - relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); + pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", + relo_sec_name, sym_idx, i); + return -LIBBPF_ERRNO__FORMAT; + } + + if (sym->st_shndx >= obj->efile.sec_cnt) { + pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", + relo_sec_name, sym_idx, (size_t)sym->st_shndx, i); return -LIBBPF_ERRNO__FORMAT; } if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) { pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", - relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); + relo_sec_name, (size_t)rel->r_offset, i); return -LIBBPF_ERRNO__FORMAT; } @@ -4265,30 +4334,24 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries) static int bpf_object__probe_loading(struct bpf_object *obj) { - struct bpf_load_program_attr attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret; + int ret, insn_cnt = ARRAY_SIZE(insns); if (obj->gen_loader) return 0; - /* make sure basic loading works */ - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; + ret = bump_rlimit_memlock(); + if (ret) + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); - ret = bpf_load_program_xattr(&attr, NULL, 0); - if (ret < 0) { - attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; - ret = bpf_load_program_xattr(&attr, NULL, 0); - } + /* make sure basic loading works */ + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + if (ret < 0) + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4312,29 +4375,19 @@ static int probe_fd(int fd) static int probe_kern_prog_name(void) { - struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret; + int ret, insn_cnt = ARRAY_SIZE(insns); /* make sure loading with name works */ - - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - attr.name = "test"; - ret = bpf_load_program_xattr(&attr, NULL, 0); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); return probe_fd(ret); } static int probe_kern_global_data(void) { - struct bpf_load_program_attr prg_attr; - struct bpf_create_map_attr map_attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), @@ -4342,15 +4395,9 @@ static int probe_kern_global_data(void) BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, map; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_ARRAY; - map_attr.key_size = sizeof(int); - map_attr.value_size = 32; - map_attr.max_entries = 1; + int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_create_map_xattr(&map_attr); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4361,13 +4408,7 @@ static int probe_kern_global_data(void) insns[0].imm = map; - memset(&prg_attr, 0, sizeof(prg_attr)); - prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - prg_attr.insns = insns; - prg_attr.insns_cnt = ARRAY_SIZE(insns); - prg_attr.license = "GPL"; - - ret = bpf_load_program_xattr(&prg_attr, NULL, 0); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); close(map); return probe_fd(ret); } @@ -4468,45 +4509,51 @@ static int probe_kern_btf_decl_tag(void) strs, sizeof(strs))); } -static int probe_kern_array_mmap(void) +static int probe_kern_btf_type_tag(void) { - struct bpf_create_map_attr attr = { - .map_type = BPF_MAP_TYPE_ARRAY, - .map_flags = BPF_F_MMAPABLE, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 1, + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ }; - return probe_fd(bpf_create_map_xattr(&attr)); + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); } static int probe_kern_exp_attach_type(void) { - struct bpf_load_program_attr attr; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; + int fd, insn_cnt = ARRAY_SIZE(insns); - memset(&attr, 0, sizeof(attr)); /* use any valid combination of program type and (optional) * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) * to see if kernel supports expected_attach_type field for * BPF_PROG_LOAD command */ - attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; - attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - - return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); } static int probe_kern_probe_read_kernel(void) { - struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ @@ -4515,34 +4562,22 @@ static int probe_kern_probe_read_kernel(void) BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), BPF_EXIT_INSN(), }; + int fd, insn_cnt = ARRAY_SIZE(insns); - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_KPROBE; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - - return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); + fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); + return probe_fd(fd); } static int probe_prog_bind_map(void) { - struct bpf_load_program_attr prg_attr; - struct bpf_create_map_attr map_attr; char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, map, prog; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_ARRAY; - map_attr.key_size = sizeof(int); - map_attr.value_size = 32; - map_attr.max_entries = 1; + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_create_map_xattr(&map_attr); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4551,13 +4586,7 @@ static int probe_prog_bind_map(void) return ret; } - memset(&prg_attr, 0, sizeof(prg_attr)); - prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - prg_attr.insns = insns; - prg_attr.insns_cnt = ARRAY_SIZE(insns); - prg_attr.license = "GPL"; - - prog = bpf_load_program_xattr(&prg_attr, NULL, 0); + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); if (prog < 0) { close(map); return 0; @@ -4602,19 +4631,14 @@ static int probe_module_btf(void) static int probe_perf_link(void) { - struct bpf_load_program_attr attr; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; int prog_fd, link_fd, err; - memset(&attr, 0, sizeof(attr)); - attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; - attr.insns = insns; - attr.insns_cnt = ARRAY_SIZE(insns); - attr.license = "GPL"; - prog_fd = bpf_load_program_xattr(&attr, NULL, 0); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), NULL); if (prog_fd < 0) return -errno; @@ -4687,14 +4711,20 @@ static struct kern_feature_desc { [FEAT_BTF_DECL_TAG] = { "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, }; -static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; int ret; - if (obj->gen_loader) + if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ @@ -4821,19 +4851,16 @@ static void bpf_map__destroy(struct bpf_map *map); static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { - struct bpf_create_map_params create_attr; + LIBBPF_OPTS(bpf_map_create_opts, create_attr); struct bpf_map_def *def = &map->def; + const char *map_name = NULL; + __u32 max_entries; int err = 0; - memset(&create_attr, 0, sizeof(create_attr)); - if (kernel_supports(obj, FEAT_PROG_NAME)) - create_attr.name = map->name; + map_name = map->name; create_attr.map_ifindex = map->map_ifindex; - create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; - create_attr.key_size = def->key_size; - create_attr.value_size = def->value_size; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; @@ -4847,18 +4874,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return nr_cpus; } pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); - create_attr.max_entries = nr_cpus; + max_entries = nr_cpus; } else { - create_attr.max_entries = def->max_entries; + max_entries = def->max_entries; } if (bpf_map__is_struct_ops(map)) - create_attr.btf_vmlinux_value_type_id = - map->btf_vmlinux_value_type_id; + create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; - create_attr.btf_fd = 0; - create_attr.btf_key_type_id = 0; - create_attr.btf_value_type_id = 0; if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) { create_attr.btf_fd = btf__fd(obj->btf); create_attr.btf_key_type_id = map->btf_key_type_id; @@ -4904,13 +4927,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } if (obj->gen_loader) { - bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); + bpf_gen__map_create(obj->gen_loader, def->type, map_name, + def->key_size, def->value_size, max_entries, + &create_attr, is_inner ? -1 : map - obj->maps); /* Pretend to have valid FD to pass various fd >= 0 checks. * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. */ map->fd = 0; } else { - map->fd = libbpf__bpf_create_map_xattr(&create_attr); + map->fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + max_entries, &create_attr); } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { @@ -4925,7 +4952,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.btf_value_type_id = 0; map->btf_key_type_id = 0; map->btf_value_type_id = 0; - map->fd = libbpf__bpf_create_map_xattr(&create_attr); + map->fd = bpf_map_create(def->type, map_name, + def->key_size, def->value_size, + max_entries, &create_attr); } err = map->fd < 0 ? -errno : 0; @@ -4940,7 +4969,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return err; } -static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) +static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) { const struct bpf_map *targ_map; unsigned int i; @@ -4952,18 +4981,18 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) targ_map = map->init_slots[i]; fd = bpf_map__fd(targ_map); + if (obj->gen_loader) { - pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", - map - obj->maps, i, targ_map - obj->maps); - return -ENOTSUP; + bpf_gen__populate_outer_map(obj->gen_loader, + map - obj->maps, i, + targ_map - obj->maps); } else { err = bpf_map_update_elem(map->fd, &i, &fd, 0); } if (err) { err = -errno; pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, - fd, err); + map->name, i, targ_map->name, fd, err); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -4976,6 +5005,59 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) return 0; } +static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) +{ + const struct bpf_program *targ_prog; + unsigned int i; + int fd, err; + + if (obj->gen_loader) + return -ENOTSUP; + + for (i = 0; i < map->init_slots_sz; i++) { + if (!map->init_slots[i]) + continue; + + targ_prog = map->init_slots[i]; + fd = bpf_program__fd(targ_prog); + + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + if (err) { + err = -errno; + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", + map->name, i, targ_prog->name, fd, err); + return err; + } + pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", + map->name, i, targ_prog->name, fd); + } + + zfree(&map->init_slots); + map->init_slots_sz = 0; + + return 0; +} + +static int bpf_object_init_prog_arrays(struct bpf_object *obj) +{ + struct bpf_map *map; + int i, err; + + for (i = 0; i < obj->nr_maps; i++) { + map = &obj->maps[i]; + + if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) + continue; + + err = init_prog_array_slots(obj, map); + if (err < 0) { + zclose(map->fd); + return err; + } + } + return 0; +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -4988,6 +5070,26 @@ bpf_object__create_maps(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { map = &obj->maps[i]; + /* To support old kernels, we skip creating global data maps + * (.rodata, .data, .kconfig, etc); later on, during program + * loading, if we detect that at least one of the to-be-loaded + * programs is referencing any global data map, we'll error + * out with program name and relocation index logged. + * This approach allows to accommodate Clang emitting + * unnecessary .rodata.str1.1 sections for string literals, + * but also it allows to have CO-RE applications that use + * global variables in some of BPF programs, but not others. + * If those global variable-using programs are not loaded at + * runtime due to bpf_program__set_autoload(prog, false), + * bpf_object loading will succeed just fine even on old + * kernels. + */ + if (bpf_map__is_internal(map) && + !kernel_supports(obj, FEAT_GLOBAL_DATA)) { + map->skipped = true; + continue; + } + retried = false; retry: if (map->pin_path) { @@ -5024,8 +5126,8 @@ retry: } } - if (map->init_slots_sz) { - err = init_map_slots(obj, map); + if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { + err = init_map_in_map_slots(obj, map); if (err < 0) { zclose(map->fd); goto err_out; @@ -5097,15 +5199,18 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; - const struct btf_type *t; - const char *targ_name; + const struct btf_type *t, *local_t; + const char *targ_name, *local_name; size_t targ_essent_len; int n, i; + local_t = btf__type_by_id(local_cand->btf, local_cand->id); + local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); + n = btf__type_cnt(targ_btf); for (i = targ_start_id; i < n; i++) { t = btf__type_by_id(targ_btf, i); - if (btf_kind(t) != btf_kind(local_cand->t)) + if (btf_kind(t) != btf_kind(local_t)) continue; targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5116,12 +5221,12 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, if (targ_essent_len != local_essent_len) continue; - if (strncmp(local_cand->name, targ_name, local_essent_len) != 0) + if (strncmp(local_name, targ_name, local_essent_len) != 0) continue; pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", - local_cand->id, btf_kind_str(local_cand->t), - local_cand->name, i, btf_kind_str(t), targ_name, + local_cand->id, btf_kind_str(local_t), + local_name, i, btf_kind_str(t), targ_name, targ_btf_name); new_cands = libbpf_reallocarray(cands->cands, cands->len + 1, sizeof(*cands->cands)); @@ -5130,8 +5235,6 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand, cand = &new_cands[cands->len]; cand->btf = targ_btf; - cand->t = t; - cand->name = targ_name; cand->id = i; cands->cands = new_cands; @@ -5238,18 +5341,21 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l struct bpf_core_cand local_cand = {}; struct bpf_core_cand_list *cands; const struct btf *main_btf; + const struct btf_type *local_t; + const char *local_name; size_t local_essent_len; int err, i; local_cand.btf = local_btf; - local_cand.t = btf__type_by_id(local_btf, local_type_id); - if (!local_cand.t) + local_cand.id = local_type_id; + local_t = btf__type_by_id(local_btf, local_type_id); + if (!local_t) return ERR_PTR(-EINVAL); - local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off); - if (str_is_empty(local_cand.name)) + local_name = btf__name_by_offset(local_btf, local_t->name_off); + if (str_is_empty(local_name)) return ERR_PTR(-EINVAL); - local_essent_len = bpf_core_essential_name_len(local_cand.name); + local_essent_len = bpf_core_essential_name_len(local_name); cands = calloc(1, sizeof(*cands)); if (!cands) @@ -5399,12 +5505,31 @@ static void *u32_as_hash_key(__u32 x) return (void *)(uintptr_t)x; } +static int record_relo_core(struct bpf_program *prog, + const struct bpf_core_relo *core_relo, int insn_idx) +{ + struct reloc_desc *relos, *relo; + + relos = libbpf_reallocarray(prog->reloc_desc, + prog->nr_reloc + 1, sizeof(*relos)); + if (!relos) + return -ENOMEM; + relo = &relos[prog->nr_reloc]; + relo->type = RELO_CORE; + relo->insn_idx = insn_idx; + relo->core_relo = core_relo; + prog->reloc_desc = relos; + prog->nr_reloc++; + return 0; +} + static int bpf_core_apply_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, struct hashmap *cand_cache) { + struct bpf_core_spec specs_scratch[3] = {}; const void *type_key = u32_as_hash_key(relo->type_id); struct bpf_core_cand_list *cands = NULL; const char *prog_name = prog->name; @@ -5435,13 +5560,15 @@ static int bpf_core_apply_relo(struct bpf_program *prog, return -EINVAL; if (prog->obj->gen_loader) { - pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", + const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + + pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", prog - prog->obj->programs, relo->insn_off / 8, - local_name, relo->kind); - return -ENOTSUP; + btf_kind_str(local_type), local_name, spec_str, insn_idx); + return record_relo_core(prog, relo, insn_idx); } - if (relo->kind != BPF_TYPE_ID_LOCAL && + if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { @@ -5457,7 +5584,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); + return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, + relo_idx, local_btf, cands, specs_scratch); } static int @@ -5587,6 +5715,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; } else { + const struct bpf_map *map = &obj->maps[relo->map_idx]; + + if (map->skipped) { + pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", + prog->name, i); + return -ENOTSUP; + } insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[0].imm = obj->maps[relo->map_idx].fd; } @@ -5635,6 +5770,9 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CALL: /* handled already */ break; + case RELO_CORE: + /* will be handled by bpf_program_record_relos() */ + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -5798,6 +5936,8 @@ static int cmp_relo_by_insn_idx(const void *key, const void *elem) static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) { + if (!prog->nr_reloc) + return NULL; return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); } @@ -5813,8 +5953,9 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); if (!relos) return -ENOMEM; - memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, - sizeof(*relos) * subprog->nr_reloc); + if (subprog->nr_reloc) + memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, + sizeof(*relos) * subprog->nr_reloc); for (i = main_prog->nr_reloc; i < new_cnt; i++) relos[i].insn_idx += subprog->sub_insn_off; @@ -6072,6 +6213,35 @@ bpf_object__free_relocs(struct bpf_object *obj) } } +static int cmp_relocs(const void *_a, const void *_b) +{ + const struct reloc_desc *a = _a; + const struct reloc_desc *b = _b; + + if (a->insn_idx != b->insn_idx) + return a->insn_idx < b->insn_idx ? -1 : 1; + + /* no two relocations should have the same insn_idx, but ... */ + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + return 0; +} + +static void bpf_object__sort_relos(struct bpf_object *obj) +{ + int i; + + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *p = &obj->programs[i]; + + if (!p->nr_reloc) + continue; + + qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); + } +} + static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { @@ -6086,6 +6256,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err); return err; } + if (obj->gen_loader) + bpf_object__sort_relos(obj); } /* Before relocating calls pre-process relocations and mark @@ -6121,6 +6293,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) */ if (prog_is_subprog(obj, prog)) continue; + if (!prog->load) + continue; err = bpf_object__relocate_calls(obj, prog); if (err) { @@ -6134,6 +6308,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; + if (!prog->load) + continue; err = bpf_object__relocate_data(obj, prog); if (err) { pr_warn("prog '%s': failed to relocate data references: %d\n", @@ -6156,9 +6332,11 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, int i, j, nrels, new_sz; const struct btf_var_secinfo *vi = NULL; const struct btf_type *sec, *var, *def; - struct bpf_map *map = NULL, *targ_map; + struct bpf_map *map = NULL, *targ_map = NULL; + struct bpf_program *targ_prog = NULL; + bool is_prog_array, is_map_in_map; const struct btf_member *member; - const char *name, *mname; + const char *name, *mname, *type; unsigned int moff; Elf64_Sym *sym; Elf64_Rel *rel; @@ -6185,11 +6363,6 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -LIBBPF_ERRNO__FORMAT; } name = elf_sym_str(obj, sym->st_name) ?: "<?>"; - if (sym->st_shndx != obj->efile.btf_maps_shndx) { - pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", - i, name); - return -LIBBPF_ERRNO__RELOC; - } pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n", i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, @@ -6211,19 +6384,45 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, return -EINVAL; } - if (!bpf_map_type__is_map_in_map(map->def.type)) - return -EINVAL; - if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && - map->def.key_size != sizeof(int)) { - pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", - i, map->name, sizeof(int)); + is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); + is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; + type = is_map_in_map ? "map" : "prog"; + if (is_map_in_map) { + if (sym->st_shndx != obj->efile.btf_maps_shndx) { + pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", + i, name); + return -LIBBPF_ERRNO__RELOC; + } + if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && + map->def.key_size != sizeof(int)) { + pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", + i, map->name, sizeof(int)); + return -EINVAL; + } + targ_map = bpf_object__find_map_by_name(obj, name); + if (!targ_map) { + pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", + i, name); + return -ESRCH; + } + } else if (is_prog_array) { + targ_prog = bpf_object__find_program_by_name(obj, name); + if (!targ_prog) { + pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", + i, name); + return -ESRCH; + } + if (targ_prog->sec_idx != sym->st_shndx || + targ_prog->sec_insn_off * 8 != sym->st_value || + prog_is_subprog(obj, targ_prog)) { + pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", + i, name); + return -LIBBPF_ERRNO__RELOC; + } + } else { return -EINVAL; } - targ_map = bpf_object__find_map_by_name(obj, name); - if (!targ_map) - return -ESRCH; - var = btf__type_by_id(obj->btf, vi->type); def = skip_mods_and_typedefs(obj->btf, var->type, NULL); if (btf_vlen(def) == 0) @@ -6254,30 +6453,15 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj, (new_sz - map->init_slots_sz) * host_ptr_sz); map->init_slots_sz = new_sz; } - map->init_slots[moff] = targ_map; + map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; - pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n", - i, map->name, moff, name); + pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", + i, map->name, moff, type, name); } return 0; } -static int cmp_relocs(const void *_a, const void *_b) -{ - const struct reloc_desc *a = _a; - const struct reloc_desc *b = _b; - - if (a->insn_idx != b->insn_idx) - return a->insn_idx < b->insn_idx ? -1 : 1; - - /* no two relocations should have the same insn_idx, but ... */ - if (a->type != b->type) - return a->type < b->type ? -1 : 1; - - return 0; -} - static int bpf_object__collect_relos(struct bpf_object *obj) { int i, err; @@ -6310,14 +6494,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) return err; } - for (i = 0; i < obj->nr_programs; i++) { - struct bpf_program *p = &obj->programs[i]; - - if (!p->nr_reloc) - continue; - - qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); - } + bpf_object__sort_relos(obj); return 0; } @@ -6374,16 +6551,16 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac /* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */ static int libbpf_preload_prog(struct bpf_program *prog, - struct bpf_prog_load_params *attr, long cookie) + struct bpf_prog_load_opts *opts, long cookie) { enum sec_def_flags def = cookie; /* old kernels might not support specifying expected_attach_type */ if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) - attr->expected_attach_type = 0; + opts->expected_attach_type = 0; if (def & SEC_SLEEPABLE) - attr->prog_flags |= BPF_F_SLEEPABLE; + opts->prog_flags |= BPF_F_SLEEPABLE; if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || @@ -6402,25 +6579,28 @@ static int libbpf_preload_prog(struct bpf_program *prog, /* but by now libbpf common logic is not utilizing * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because - * this callback is called after attrs were populated by - * libbpf, so this callback has to update attr explicitly here + * this callback is called after opts were populated by + * libbpf, so this callback has to update opts explicitly here */ - attr->attach_btf_obj_fd = btf_obj_fd; - attr->attach_btf_id = btf_type_id; + opts->attach_btf_obj_fd = btf_obj_fd; + opts->attach_btf_id = btf_type_id; } return 0; } -static int -load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd) +static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, + struct bpf_insn *insns, int insns_cnt, + const char *license, __u32 kern_version, + int *prog_fd) { - struct bpf_prog_load_params load_attr = {}; - struct bpf_object *obj = prog->obj; + LIBBPF_OPTS(bpf_prog_load_opts, load_attr); + const char *prog_name = NULL; char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; - char *log_buf = NULL; + char *log_buf = NULL, *tmp; int btf_fd, ret, err; + bool own_log_buf = true; + __u32 log_level = prog->log_level; if (prog->type == BPF_PROG_TYPE_UNSPEC) { /* @@ -6435,14 +6615,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, if (!insns || !insns_cnt) return -EINVAL; - load_attr.prog_type = prog->type; load_attr.expected_attach_type = prog->expected_attach_type; if (kernel_supports(obj, FEAT_PROG_NAME)) - load_attr.name = prog->name; - load_attr.insns = insns; - load_attr.insn_cnt = insns_cnt; - load_attr.license = license; - load_attr.attach_btf_id = prog->attach_btf_id; + prog_name = prog->name; load_attr.attach_prog_fd = prog->attach_prog_fd; load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; load_attr.attach_btf_id = prog->attach_btf_id; @@ -6460,7 +6635,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.line_info_rec_size = prog->line_info_rec_size; load_attr.line_info_cnt = prog->line_info_cnt; } - load_attr.log_level = prog->log_level; + load_attr.log_level = log_level; load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; @@ -6475,27 +6650,51 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, } if (obj->gen_loader) { - bpf_gen__prog_load(obj->gen_loader, &load_attr, + bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, + license, insns, insns_cnt, &load_attr, prog - obj->programs); - *pfd = -1; + *prog_fd = -1; return 0; } -retry_load: - if (log_buf_size) { - log_buf = malloc(log_buf_size); - if (!log_buf) - return -ENOMEM; - *log_buf = 0; +retry_load: + /* if log_level is zero, we don't request logs initiallly even if + * custom log_buf is specified; if the program load fails, then we'll + * bump log_level to 1 and use either custom log_buf or we'll allocate + * our own and retry the load to get details on what failed + */ + if (log_level) { + if (prog->log_buf) { + log_buf = prog->log_buf; + log_buf_size = prog->log_size; + own_log_buf = false; + } else if (obj->log_buf) { + log_buf = obj->log_buf; + log_buf_size = obj->log_size; + own_log_buf = false; + } else { + log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); + tmp = realloc(log_buf, log_buf_size); + if (!tmp) { + ret = -ENOMEM; + goto out; + } + log_buf = tmp; + log_buf[0] = '\0'; + own_log_buf = true; + } } load_attr.log_buf = log_buf; - load_attr.log_buf_sz = log_buf_size; - ret = libbpf__bpf_prog_load(&load_attr); + load_attr.log_size = log_buf_size; + load_attr.log_level = log_level; + ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr); if (ret >= 0) { - if (log_buf && load_attr.log_level) - pr_debug("verifier log:\n%s", log_buf); + if (log_level && own_log_buf) { + pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) { struct bpf_map *map; @@ -6508,61 +6707,53 @@ retry_load: if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to bind .rodata map: %s\n", - prog->name, cp); + pr_warn("prog '%s': failed to bind map '%s': %s\n", + prog->name, map->real_name, cp); /* Don't fail hard if can't bind rodata. */ } } } - *pfd = ret; + *prog_fd = ret; ret = 0; goto out; } - if (!log_buf || errno == ENOSPC) { - log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, - log_buf_size << 1); - - free(log_buf); + if (log_level == 0) { + log_level = 1; goto retry_load; } - ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; + /* On ENOSPC, increase log buffer size and retry, unless custom + * log_buf is specified. + * Be careful to not overflow u32, though. Kernel's log buf size limit + * isn't part of UAPI so it can always be bumped to full 4GB. So don't + * multiply by 2 unless we are sure we'll fit within 32 bits. + * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). + */ + if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) + goto retry_load; + + ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("load bpf program failed: %s\n", cp); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); pr_perm_msg(ret); - if (log_buf && log_buf[0] != '\0') { - ret = -LIBBPF_ERRNO__VERIFY; - pr_warn("-- BEGIN DUMP LOG ---\n"); - pr_warn("\n%s\n", log_buf); - pr_warn("-- END LOG --\n"); - } else if (load_attr.insn_cnt >= BPF_MAXINSNS) { - pr_warn("Program too large (%zu insns), at most %d insns\n", - load_attr.insn_cnt, BPF_MAXINSNS); - ret = -LIBBPF_ERRNO__PROG2BIG; - } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { - /* Wrong program type? */ - int fd; - - load_attr.prog_type = BPF_PROG_TYPE_KPROBE; - load_attr.expected_attach_type = 0; - load_attr.log_buf = NULL; - load_attr.log_buf_sz = 0; - fd = libbpf__bpf_prog_load(&load_attr); - if (fd >= 0) { - close(fd); - ret = -LIBBPF_ERRNO__PROGTYPE; - goto out; - } + if (own_log_buf && log_buf && log_buf[0] != '\0') { + pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", + prog->name, log_buf); + } + if (insns_cnt >= BPF_MAXINSNS) { + pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", + prog->name, insns_cnt, BPF_MAXINSNS); } out: - free(log_buf); + if (own_log_buf) + free(log_buf); return ret; } -static int bpf_program__record_externs(struct bpf_program *prog) +static int bpf_program_record_relos(struct bpf_program *prog) { struct bpf_object *obj = prog->obj; int i; @@ -6584,6 +6775,17 @@ static int bpf_program__record_externs(struct bpf_program *prog) ext->is_weak, false, BTF_KIND_FUNC, relo->insn_idx); break; + case RELO_CORE: { + struct bpf_core_relo cr = { + .insn_off = relo->insn_idx * 8, + .type_id = relo->core_relo->type_id, + .access_str_off = relo->core_relo->access_str_off, + .kind = relo->core_relo->kind, + }; + + bpf_gen__record_relo_core(obj->gen_loader, &cr); + break; + } default: continue; } @@ -6591,11 +6793,12 @@ static int bpf_program__record_externs(struct bpf_program *prog) return 0; } -int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) +static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, + const char *license, __u32 kern_ver) { int err = 0, fd, i; - if (prog->obj->loaded) { + if (obj->loaded) { pr_warn("prog '%s': can't load after object was loaded\n", prog->name); return libbpf_err(-EINVAL); } @@ -6621,10 +6824,11 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) pr_warn("prog '%s': inconsistent nr(%d) != 1\n", prog->name, prog->instances.nr); } - if (prog->obj->gen_loader) - bpf_program__record_externs(prog); - err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_ver, &fd); + if (obj->gen_loader) + bpf_program_record_relos(prog); + err = bpf_object_load_prog_instance(obj, prog, + prog->insns, prog->insns_cnt, + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -6652,8 +6856,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) continue; } - err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, license, kern_ver, &fd); + err = bpf_object_load_prog_instance(obj, prog, + result.new_insn_ptr, result.new_insn_cnt, + license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->name); @@ -6670,6 +6875,11 @@ out: return libbpf_err(err); } +int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) +{ + return bpf_object_load_prog(prog->obj, prog, license, kern_ver); +} + static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { @@ -6693,7 +6903,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) continue; } prog->log_level |= log_level; - err = bpf_program__load(prog, obj->license, obj->kern_version); + err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version); if (err) return err; } @@ -6744,14 +6954,16 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object return 0; } -static struct bpf_object * -__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - const struct bpf_object_open_opts *opts) +static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, + const struct bpf_object_open_opts *opts) { const char *obj_name, *kconfig, *btf_tmp_path; struct bpf_object *obj; char tmp_name[64]; int err; + char *log_buf; + size_t log_size; + __u32 log_level; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn("failed to init libelf for %s\n", @@ -6774,10 +6986,22 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, pr_debug("loading object '%s' from buffer\n", obj_name); } + log_buf = OPTS_GET(opts, kernel_log_buf, NULL); + log_size = OPTS_GET(opts, kernel_log_size, 0); + log_level = OPTS_GET(opts, kernel_log_level, 0); + if (log_size > UINT_MAX) + return ERR_PTR(-EINVAL); + if (log_size && !log_buf) + return ERR_PTR(-EINVAL); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; + obj->log_buf = log_buf; + obj->log_size = log_size; + obj->log_level = log_level; + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -6831,7 +7055,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) return NULL; pr_debug("loading %s\n", attr->file); - return __bpf_object__open(attr->file, NULL, 0, &opts); + return bpf_object_open(attr->file, NULL, 0, &opts); } struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -6857,7 +7081,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) pr_debug("loading %s\n", path); - return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts)); + return libbpf_ptr(bpf_object_open(path, NULL, 0, opts)); } struct bpf_object * @@ -6867,7 +7091,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, if (!obj_buf || obj_buf_sz == 0) return libbpf_err_ptr(-EINVAL); - return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts)); + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts)); } struct bpf_object * @@ -6884,7 +7108,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, if (!obj_buf || obj_buf_sz == 0) return errno = EINVAL, NULL; - return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); + return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts)); } static int bpf_object_unload(struct bpf_object *obj) @@ -6915,10 +7139,6 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) bpf_object__for_each_map(m, obj) { if (!bpf_map__is_internal(m)) continue; - if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { - pr_warn("kernel doesn't support global data\n"); - return -ENOTSUP; - } if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) m->def.map_flags ^= BPF_F_MMAPABLE; } @@ -7241,14 +7461,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj, return 0; } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) +static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) { - struct bpf_object *obj; int err, i; - if (!attr) - return libbpf_err(-EINVAL); - obj = attr->obj; if (!obj) return libbpf_err(-EINVAL); @@ -7258,7 +7474,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) } if (obj->gen_loader) - bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps); + bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); @@ -7267,8 +7483,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); - err = err ? : bpf_object__load_progs(obj, attr->log_level); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__load_progs(obj, extra_log_level); + err = err ? : bpf_object_init_prog_arrays(obj); if (obj->gen_loader) { /* reset FDs */ @@ -7312,13 +7529,14 @@ out: return libbpf_err(err); } -int bpf_object__load(struct bpf_object *obj) +int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { - struct bpf_object_load_attr attr = { - .obj = obj, - }; + return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); +} - return bpf_object__load_xattr(&attr); +int bpf_object__load(struct bpf_object *obj) +{ + return bpf_object_load(obj, 0, NULL); } static int make_parent_dir(const char *path) @@ -7707,6 +7925,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) char *pin_path = NULL; char buf[PATH_MAX]; + if (map->skipped) + continue; + if (path) { int len; @@ -7733,7 +7954,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return 0; err_unpin_maps: - while ((map = bpf_map__prev(map, obj))) { + while ((map = bpf_object__prev_map(obj, map))) { if (!map->pin_path) continue; @@ -7813,7 +8034,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) return 0; err_unpin_programs: - while ((prog = bpf_program__prev(prog, obj))) { + while ((prog = bpf_object__prev_program(obj, prog))) { char buf[PATH_MAX]; int len; @@ -8154,9 +8375,11 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) return 0; } +static int bpf_program_nth_fd(const struct bpf_program *prog, int n); + int bpf_program__fd(const struct bpf_program *prog) { - return bpf_program__nth_fd(prog, 0); + return bpf_program_nth_fd(prog, 0); } size_t bpf_program__size(const struct bpf_program *prog) @@ -8202,7 +8425,10 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, return 0; } -int bpf_program__nth_fd(const struct bpf_program *prog, int n) +__attribute__((alias("bpf_program_nth_fd"))) +int bpf_program__nth_fd(const struct bpf_program *prog, int n); + +static int bpf_program_nth_fd(const struct bpf_program *prog, int n) { int fd; @@ -8281,6 +8507,54 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } +__u32 bpf_program__flags(const struct bpf_program *prog) +{ + return prog->prog_flags; +} + +int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) +{ + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + + prog->prog_flags = flags; + return 0; +} + +__u32 bpf_program__log_level(const struct bpf_program *prog) +{ + return prog->log_level; +} + +int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) +{ + if (prog->obj->loaded) + return libbpf_err(-EBUSY); + + prog->log_level = log_level; + return 0; +} + +const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) +{ + *log_size = prog->log_size; + return prog->log_buf; +} + +int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) +{ + if (log_size && !log_buf) + return -EINVAL; + if (prog->log_size > UINT_MAX) + return -EINVAL; + if (prog->obj->loaded) + return -EBUSY; + + prog->log_buf = log_buf; + prog->log_size = log_size; + return 0; +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -9028,7 +9302,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) pr_warn("error: inner_map_fd already specified\n"); return libbpf_err(-EINVAL); } - zfree(&map->inner_map); + if (map->inner_map) { + bpf_map__destroy(map->inner_map); + zfree(&map->inner_map); + } map->inner_map_fd = fd; return 0; } @@ -9145,21 +9422,12 @@ long libbpf_get_error(const void *ptr) return -errno; } -int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_prog_load_attr attr; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; - - return bpf_prog_load_xattr(&attr, pobj, prog_fd); -} - +__attribute__((alias("bpf_prog_load_xattr2"))) int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, - struct bpf_object **pobj, int *prog_fd) + struct bpf_object **pobj, int *prog_fd); + +static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, + struct bpf_object **pobj, int *prog_fd) { struct bpf_object_open_attr open_attr = {}; struct bpf_program *prog, *first_prog = NULL; @@ -9230,6 +9498,20 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return 0; } +COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) +int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_prog_load_attr attr; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = type; + attr.expected_attach_type = 0; + + return bpf_prog_load_xattr2(&attr, pobj, prog_fd); +} + struct bpf_link { int (*detach)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); @@ -9634,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...) static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, const char *kfunc_name, size_t offset) { - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); + static int index = 0; + + snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, + __sync_fetch_and_add(&index, 1)); } static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, @@ -9735,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), func_name, offset); - legacy_probe = strdup(func_name); + legacy_probe = strdup(probe_name); if (!legacy_probe) return libbpf_err_ptr(-ENOMEM); @@ -10394,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map) return link; } -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, - void **copy_mem, size_t *copy_size, - bpf_perf_event_print_t fn, void *private_data) +static enum bpf_perf_event_ret +perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data) { struct perf_event_mmap_page *header = mmap_mem; __u64 data_head = ring_buffer_read_head(header); @@ -10442,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, return libbpf_err(ret); } +__attribute__((alias("perf_event_read_simple"))) +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, + void **copy_mem, size_t *copy_size, + bpf_perf_event_print_t fn, void *private_data); + struct perf_buffer; struct perf_buffer_params { @@ -10575,11 +10866,18 @@ error: static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p); -struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, - const struct perf_buffer_opts *opts) +DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) +struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, + perf_buffer_lost_fn lost_cb, + void *ctx, + const struct perf_buffer_opts *opts) { struct perf_buffer_params p = {}; - struct perf_event_attr attr = { 0, }; + struct perf_event_attr attr = {}; + + if (!OPTS_VALID(opts, perf_buffer_opts)) + return libbpf_err_ptr(-EINVAL); attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; @@ -10588,29 +10886,62 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.wakeup_events = 1; p.attr = &attr; - p.sample_cb = opts ? opts->sample_cb : NULL; - p.lost_cb = opts ? opts->lost_cb : NULL; - p.ctx = opts ? opts->ctx : NULL; + p.sample_cb = sample_cb; + p.lost_cb = lost_cb; + p.ctx = ctx; return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } -struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, - const struct perf_buffer_raw_opts *opts) +COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) +struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts) +{ + return perf_buffer__new_v0_6_0(map_fd, page_cnt, + opts ? opts->sample_cb : NULL, + opts ? opts->lost_cb : NULL, + opts ? opts->ctx : NULL, + NULL); +} + +DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) +struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, + struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts) { struct perf_buffer_params p = {}; - p.attr = opts->attr; - p.event_cb = opts->event_cb; - p.ctx = opts->ctx; - p.cpu_cnt = opts->cpu_cnt; - p.cpus = opts->cpus; - p.map_keys = opts->map_keys; + if (page_cnt == 0 || !attr) + return libbpf_err_ptr(-EINVAL); + + if (!OPTS_VALID(opts, perf_buffer_raw_opts)) + return libbpf_err_ptr(-EINVAL); + + p.attr = attr; + p.event_cb = event_cb; + p.ctx = ctx; + p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); + p.cpus = OPTS_GET(opts, cpus, NULL); + p.map_keys = OPTS_GET(opts, map_keys, NULL); return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } +COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) +struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts) +{ + LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, + .cpu_cnt = opts->cpu_cnt, + .cpus = opts->cpus, + .map_keys = opts->map_keys, + ); + + return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, + opts->event_cb, opts->ctx, &inner_opts); +} + static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { @@ -10810,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb, { enum bpf_perf_event_ret ret; - ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, - pb->page_size, &cpu_buf->buf, - &cpu_buf->buf_size, - perf_buffer__process_record, cpu_buf); + ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, + pb->page_size, &cpu_buf->buf, + &cpu_buf->buf_size, + perf_buffer__process_record, cpu_buf); if (ret != LIBBPF_PERF_EVENT_CONT) return ret; return 0; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 9de0f299706b..8b9bc5e90c2b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -24,6 +24,10 @@ extern "C" { #endif +LIBBPF_API __u32 libbpf_major_version(void); +LIBBPF_API __u32 libbpf_minor_version(void); +LIBBPF_API const char *libbpf_version_string(void); + enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -104,12 +108,73 @@ struct bpf_object_open_opts { * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. */ const char *btf_custom_path; + /* Pointer to a buffer for storing kernel logs for applicable BPF + * commands. Valid kernel_log_size has to be specified as well and are + * passed-through to bpf() syscall. Keep in mind that kernel might + * fail operation with -ENOSPC error if provided buffer is too small + * to contain entire log output. + * See the comment below for kernel_log_level for interaction between + * log_buf and log_level settings. + * + * If specified, this log buffer will be passed for: + * - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden + * with bpf_program__set_log() on per-program level, to get + * BPF verifier log output. + * - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get + * BTF sanity checking log. + * + * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite + * previous contents, so if you need more fine-grained control, set + * per-program buffer with bpf_program__set_log_buf() to preserve each + * individual program's verification log. Keep using kernel_log_buf + * for BTF verification log, if necessary. + */ + char *kernel_log_buf; + size_t kernel_log_size; + /* + * Log level can be set independently from log buffer. Log_level=0 + * means that libbpf will attempt loading BTF or program without any + * logging requested, but will retry with either its own or custom log + * buffer, if provided, and log_level=1 on any error. + * And vice versa, setting log_level>0 will request BTF or prog + * loading with verbose log from the first attempt (and as such also + * for successfully loaded BTF or program), and the actual log buffer + * could be either libbpf's own auto-allocated log buffer, if + * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf. + * If user didn't provide custom log buffer, libbpf will emit captured + * logs through its print callback. + */ + __u32 kernel_log_level; + + size_t :0; }; -#define bpf_object_open_opts__last_field btf_custom_path +#define bpf_object_open_opts__last_field kernel_log_level LIBBPF_API struct bpf_object *bpf_object__open(const char *path); + +/** + * @brief **bpf_object__open_file()** creates a bpf_object by opening + * the BPF ELF object file pointed to by the passed path and loading it + * into memory. + * @param path BPF object file path + * @param opts options for how to load the bpf object, this parameter is + * optional and can be set to NULL + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); + +/** + * @brief **bpf_object__open_mem()** creates a bpf_object by reading + * the BPF objects raw bytes from a memory buffer containing a valid + * BPF ELF object file. + * @param obj_buf pointer to the buffer containing ELF file bytes + * @param obj_buf_sz number of bytes in the buffer + * @param opts options for how to load the bpf object + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); @@ -149,6 +214,7 @@ struct bpf_object_load_attr { /* Load/unload object into/from kernel */ LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead") LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead") LIBBPF_API int bpf_object__unload(struct bpf_object *obj); @@ -161,6 +227,7 @@ struct btf; LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead") LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title); @@ -262,8 +329,8 @@ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *p */ LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); -LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, - __u32 kern_version); +LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") +LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated") LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, @@ -273,7 +340,31 @@ LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated" LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance); + +/** + * @brief **bpf_program__pin()** pins the BPF program to a file + * in the BPF FS specified by a path. This increments the programs + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param prog BPF program to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); + +/** + * @brief **bpf_program__unpin()** unpins the BPF program from a file + * in the BPFFS specified by a path. This decrements the programs + * reference count. + * + * The file pinning the BPF program can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); @@ -344,10 +435,41 @@ struct bpf_uprobe_opts { }; #define bpf_uprobe_opts__last_field retprobe +/** + * @brief **bpf_program__attach_uprobe()** attaches a BPF program + * to the userspace function which is found by binary path and + * offset. You can optionally specify a particular proccess to attach + * to. You can also optionally attach the program to the function + * exit instead of entry. + * + * @param prog BPF program to attach + * @param retprobe Attach to function exit + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains the function symbol + * @param func_offset Offset within the binary of the function symbol + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); + +/** + * @brief **bpf_program__attach_uprobe_opts()** is just like + * bpf_program__attach_uprobe() except with a options struct + * for various configurations. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains the function symbol + * @param func_offset Offset within the binary of the function symbol + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const char *binary_path, size_t func_offset, @@ -431,7 +553,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, * one instance. In this case bpf_program__fd(prog) is equal to * bpf_program__nth_fd(prog, 0). */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions") struct bpf_prog_prep_result { /* * If not NULL, load new instruction array. @@ -494,6 +615,18 @@ LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); +LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); + +/* Per-program log level and log buffer getters/setters. + * See bpf_object_open_opts comments regarding log_level and log_buf + * interactions. + */ +LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); +LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); +LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); + LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); @@ -544,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name); * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. */ -LIBBPF_API struct bpf_map * +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") +struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") @@ -611,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead") LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); /** @@ -674,10 +809,12 @@ struct bpf_prog_load_attr { int prog_flags; }; +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead") LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_object **pobj, int *prog_fd); -LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") +LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); /* XDP related API */ struct xdp_link_info { @@ -775,18 +912,52 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt); /* common use perf buffer options */ struct perf_buffer_opts { - /* if specified, sample_cb is called for each sample */ - perf_buffer_sample_fn sample_cb; - /* if specified, lost_cb is called for each batch of lost samples */ - perf_buffer_lost_fn lost_cb; - /* ctx is provided to sample_cb and lost_cb */ - void *ctx; + union { + size_t sz; + struct { /* DEPRECATED: will be removed in v1.0 */ + /* if specified, sample_cb is called for each sample */ + perf_buffer_sample_fn sample_cb; + /* if specified, lost_cb is called for each batch of lost samples */ + perf_buffer_lost_fn lost_cb; + /* ctx is provided to sample_cb and lost_cb */ + void *ctx; + }; + }; }; +#define perf_buffer_opts__last_field sz +/** + * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified + * BPF_PERF_EVENT_ARRAY map + * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF + * code to send data over to user-space + * @param page_cnt number of memory pages allocated for each per-CPU buffer + * @param sample_cb function called on each received data record + * @param lost_cb function called when record loss has occurred + * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* + * @return a new instance of struct perf_buffer on success, NULL on error with + * *errno* containing an error code + */ LIBBPF_API struct perf_buffer * perf_buffer__new(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, const struct perf_buffer_opts *opts); +LIBBPF_API struct perf_buffer * +perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, + perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, + const struct perf_buffer_opts *opts); + +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") +struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_opts *opts); + +#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) +#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ + perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) +#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ + perf_buffer__new_deprecated(map_fd, page_cnt, opts) + enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, LIBBPF_PERF_EVENT_ERROR = -1, @@ -800,12 +971,21 @@ typedef enum bpf_perf_event_ret /* raw perf buffer options, giving most power and control */ struct perf_buffer_raw_opts { - /* perf event attrs passed directly into perf_event_open() */ - struct perf_event_attr *attr; - /* raw event callback */ - perf_buffer_event_fn event_cb; - /* ctx is provided to event_cb */ - void *ctx; + union { + struct { + size_t sz; + long :0; + long :0; + }; + struct { /* DEPRECATED: will be removed in v1.0 */ + /* perf event attrs passed directly into perf_event_open() */ + struct perf_event_attr *attr; + /* raw event callback */ + perf_buffer_event_fn event_cb; + /* ctx is provided to event_cb */ + void *ctx; + }; + }; /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of * max_entries of given PERF_EVENT_ARRAY map) */ @@ -815,11 +995,28 @@ struct perf_buffer_raw_opts { /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */ int *map_keys; }; +#define perf_buffer_raw_opts__last_field map_keys LIBBPF_API struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, +perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, const struct perf_buffer_raw_opts *opts); +LIBBPF_API struct perf_buffer * +perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, + perf_buffer_event_fn event_cb, void *ctx, + const struct perf_buffer_raw_opts *opts); + +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") +struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, + const struct perf_buffer_raw_opts *opts); + +#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) +#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ + perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) +#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ + perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) + LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb); LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); @@ -831,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr, void *private_data); +LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead") LIBBPF_API enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -857,13 +1055,57 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, * user, causing subsequent probes to fail. In this case, the caller may want * to adjust that limit with setrlimit(). */ -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, - __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead") LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, - enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection") LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); +/** + * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports + * BPF programs of a given type. + * @param prog_type BPF program type to detect kernel support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given program type is supported; 0, if given program type is + * not supported; negative error code if feature detection failed or can't be + * performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts); +/** + * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports + * BPF maps of a given type. + * @param map_type BPF map type to detect kernel support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given map type is supported; 0, if given map type is + * not supported; negative error code if feature detection failed or can't be + * performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts); +/** + * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the + * use of a given BPF helper from specified BPF program type. + * @param prog_type BPF program type used to check the support of BPF helper + * @param helper_id BPF helper ID (enum bpf_func_id) to check support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given combination of program type and helper is supported; 0, + * if the combination is not supported; negative error code if feature + * detection for provided input arguments failed or can't be performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, + enum bpf_func_id helper_id, const void *opts); + /* * Get bpf_prog_info in continuous memory * @@ -918,12 +1160,15 @@ struct bpf_prog_info_linear { __u8 data[]; }; +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API struct bpf_prog_info_linear * bpf_program__get_prog_info_linear(int fd, __u64 arrays); +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper") LIBBPF_API void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); @@ -965,11 +1210,11 @@ struct bpf_object_skeleton { struct bpf_object **obj; int map_cnt; - int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ + int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */ struct bpf_map_skeleton *maps; int prog_cnt; - int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ + int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */ struct bpf_prog_skeleton *progs; }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 43580eb47740..529783967793 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -391,14 +391,44 @@ LIBBPF_0.6.0 { global: bpf_map__map_extra; bpf_map__set_map_extra; + bpf_map_create; bpf_object__next_map; bpf_object__next_program; bpf_object__prev_map; bpf_object__prev_program; + bpf_prog_load_deprecated; + bpf_prog_load; + bpf_program__flags; bpf_program__insn_cnt; bpf_program__insns; + bpf_program__set_flags; btf__add_btf; btf__add_decl_tag; + btf__add_type_tag; + btf__dedup; + btf__dedup_deprecated; btf__raw_data; btf__type_cnt; + btf_dump__new; + btf_dump__new_deprecated; + libbpf_major_version; + libbpf_minor_version; + libbpf_version_string; + perf_buffer__new; + perf_buffer__new_deprecated; + perf_buffer__new_raw; + perf_buffer__new_raw_deprecated; } LIBBPF_0.5.0; + +LIBBPF_0.7.0 { + global: + bpf_btf_load; + bpf_program__log_buf; + bpf_program__log_level; + bpf_program__set_log_buf; + bpf_program__set_log_level; + libbpf_probe_bpf_helper; + libbpf_probe_bpf_map_type; + libbpf_probe_bpf_prog_type; + libbpf_set_memlock_rlim_max; +}; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index aaa1efbf6f51..000e37798ff2 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -40,6 +40,23 @@ #else #define __LIBBPF_MARK_DEPRECATED_0_7(X) #endif +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) +#define __LIBBPF_MARK_DEPRECATED_0_8(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_8(X) +#endif + +/* This set of internal macros allows to do "function overloading" based on + * number of arguments provided by used in backwards-compatible way during the + * transition to libbpf 1.0 + * It's ugly but necessary evil that will be cleaned up when we get to 1.0. + * See bpf_prog_load() overload for example. + */ +#define ___libbpf_cat(A, B) A ## B +#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM) +#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N +#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1) +#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__) /* Helper macro to declare and initialize libbpf options struct * @@ -54,7 +71,7 @@ * including any extra padding, it with memset() and then assigns initial * values provided by users in struct initializer-syntax as varargs. */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ +#define LIBBPF_OPTS(TYPE, NAME, ...) \ struct TYPE NAME = ({ \ memset(&NAME, 0, sizeof(struct TYPE)); \ (struct TYPE) { \ diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index aeb79e3a8ff9..1565679eb432 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -73,6 +73,8 @@ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz) #define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_TYPE_TAG_ENC(value, type) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) #ifndef likely #define likely(x) __builtin_expect(!!(x), 1) @@ -167,10 +169,31 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) return realloc(ptr, total); } +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + +__u32 get_kernel_version(void); + struct btf; struct btf_type; -struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id); +struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id); const char *btf_kind_str(const struct btf_type *t); const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); @@ -270,63 +293,51 @@ static inline bool libbpf_validate_opts(const char *opts, (opts)->sz - __off); \ }) +enum kern_feature_id { + /* v4.14: kernel support for program & map names. */ + FEAT_PROG_NAME, + /* v5.2: kernel support for global data sections. */ + FEAT_GLOBAL_DATA, + /* BTF support */ + FEAT_BTF, + /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ + FEAT_BTF_FUNC, + /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ + FEAT_BTF_DATASEC, + /* BTF_FUNC_GLOBAL is supported */ + FEAT_BTF_GLOBAL_FUNC, + /* BPF_F_MMAPABLE is supported for arrays */ + FEAT_ARRAY_MMAP, + /* kernel support for expected_attach_type in BPF_PROG_LOAD */ + FEAT_EXP_ATTACH_TYPE, + /* bpf_probe_read_{kernel,user}[_str] helpers */ + FEAT_PROBE_READ_KERN, + /* BPF_PROG_BIND_MAP is supported */ + FEAT_PROG_BIND_MAP, + /* Kernel support for module BTFs */ + FEAT_MODULE_BTF, + /* BTF_KIND_FLOAT support */ + FEAT_BTF_FLOAT, + /* BPF perf link support */ + FEAT_PERF_LINK, + /* BTF_KIND_DECL_TAG support */ + FEAT_BTF_DECL_TAG, + /* BTF_KIND_TYPE_TAG support */ + FEAT_BTF_TYPE_TAG, + /* memcg-based accounting for BPF maps and progs */ + FEAT_MEMCG_ACCOUNT, + __FEAT_CNT, +}; + +int probe_memcg_account(void); +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); - -struct bpf_prog_load_params { - enum bpf_prog_type prog_type; - enum bpf_attach_type expected_attach_type; - const char *name; - const struct bpf_insn *insns; - size_t insn_cnt; - const char *license; - __u32 kern_version; - __u32 attach_prog_fd; - __u32 attach_btf_obj_fd; - __u32 attach_btf_id; - __u32 prog_ifindex; - __u32 prog_btf_fd; - __u32 prog_flags; - - __u32 func_info_rec_size; - const void *func_info; - __u32 func_info_cnt; - - __u32 line_info_rec_size; - const void *line_info; - __u32 line_info_cnt; - - __u32 log_level; - char *log_buf; - size_t log_buf_sz; - int *fd_array; -}; - -int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); - -struct bpf_create_map_params { - const char *name; - enum bpf_map_type map_type; - __u32 map_flags; - __u32 key_size; - __u32 value_size; - __u32 max_entries; - __u32 numa_node; - __u32 btf_fd; - __u32 btf_key_type_id; - __u32 btf_value_type_id; - __u32 map_ifindex; - union { - __u32 inner_map_fd; - __u32 btf_vmlinux_value_type_id; - }; - __u64 map_extra; -}; - -int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 5ba5c9beccfa..79131f761a27 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -45,7 +45,6 @@ enum libbpf_strict_mode { * (positive) error code. */ LIBBPF_STRICT_DIRECT_ERRS = 0x02, - /* * Enforce strict BPF program section (SEC()) names. * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were @@ -63,12 +62,24 @@ enum libbpf_strict_mode { * Clients can maintain it on their own if it is valuable for them. */ LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, + /* + * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the + * first BPF program or map creation operation. This is done only if + * kernel is too old to support memcg-based memory accounting for BPF + * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, + * but it can be overriden with libbpf_set_memlock_rlim_max() API. + * Note that libbpf_set_memlock_rlim_max() needs to be called before + * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() + * operation. + */ + LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, __LIBBPF_STRICT_LAST, }; LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); +#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 68f2dbf364aa..97b06cede56f 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -48,41 +48,65 @@ static int get_vendor_id(int ifindex) return strtol(buf, NULL, 0); } -static int get_kernel_version(void) +static int probe_prog_load(enum bpf_prog_type prog_type, + const struct bpf_insn *insns, size_t insns_cnt, + char *log_buf, size_t log_buf_sz, + __u32 ifindex) { - int version, subversion, patchlevel; - struct utsname utsn; - - /* Return 0 on failure, and attempt to probe with empty kversion */ - if (uname(&utsn)) - return 0; - - if (sscanf(utsn.release, "%d.%d.%d", - &version, &subversion, &patchlevel) != 3) - return 0; - - return (version << 16) + (subversion << 8) + patchlevel; -} - -static void -probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, - size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) -{ - struct bpf_load_program_attr xattr = {}; - int fd; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_buf = log_buf, + .log_size = log_buf_sz, + .log_level = log_buf ? 1 : 0, + .prog_ifindex = ifindex, + ); + int fd, err, exp_err = 0; + const char *exp_msg = NULL; + char buf[4096]; switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT; break; case BPF_PROG_TYPE_CGROUP_SOCKOPT: - xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; + opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT; break; case BPF_PROG_TYPE_SK_LOOKUP: - xattr.expected_attach_type = BPF_SK_LOOKUP; + opts.expected_attach_type = BPF_SK_LOOKUP; break; case BPF_PROG_TYPE_KPROBE: - xattr.kern_version = get_kernel_version(); + opts.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_LIRC_MODE2: + opts.expected_attach_type = BPF_LIRC_MODE2; + break; + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + opts.log_buf = buf; + opts.log_size = sizeof(buf); + opts.log_level = 1; + if (prog_type == BPF_PROG_TYPE_TRACING) + opts.expected_attach_type = BPF_TRACE_FENTRY; + else + opts.expected_attach_type = BPF_MODIFY_RETURN; + opts.attach_btf_id = 1; + + exp_err = -EINVAL; + exp_msg = "attach_btf_id 1 is not a function"; + break; + case BPF_PROG_TYPE_EXT: + opts.log_buf = buf; + opts.log_size = sizeof(buf); + opts.log_level = 1; + opts.attach_btf_id = 1; + + exp_err = -EINVAL; + exp_msg = "Cannot replace kernel functions"; + break; + case BPF_PROG_TYPE_SYSCALL: + opts.prog_flags = BPF_F_SLEEPABLE; + break; + case BPF_PROG_TYPE_STRUCT_OPS: + exp_err = -524; /* -ENOTSUPP */ break; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_SOCKET_FILTER: @@ -103,27 +127,42 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_LWT_SEG6LOCAL: - case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_TRACING: - case BPF_PROG_TYPE_STRUCT_OPS: - case BPF_PROG_TYPE_EXT: - case BPF_PROG_TYPE_LSM: - default: break; + default: + return -EOPNOTSUPP; } - xattr.prog_type = prog_type; - xattr.insns = insns; - xattr.insns_cnt = insns_cnt; - xattr.license = "GPL"; - xattr.prog_ifindex = ifindex; - - fd = bpf_load_program_xattr(&xattr, buf, buf_len); + fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); + err = -errno; if (fd >= 0) close(fd); + if (exp_err) { + if (fd >= 0 || err != exp_err) + return 0; + if (exp_msg && !strstr(buf, exp_msg)) + return 0; + return 1; + } + return fd >= 0 ? 1 : 0; +} + +int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN() + }; + const size_t insn_cnt = ARRAY_SIZE(insns); + int ret; + + if (opts) + return libbpf_err(-EINVAL); + + ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); + return libbpf_err(ret); } bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) @@ -133,12 +172,16 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) BPF_EXIT_INSN() }; + /* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ + if (ifindex == 0) + return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) /* nfp returns -EINVAL on exit(0) with TC offload */ insns[0].imm = 2; errno = 0; - probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); return errno != EINVAL && errno != EOPNOTSUPP; } @@ -166,7 +209,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); + btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); free(raw_btf); return btf_fd; @@ -199,17 +242,18 @@ static int load_local_storage_btf(void) strs, sizeof(strs)); } -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex) { - int key_size, value_size, max_entries, map_flags; + LIBBPF_OPTS(bpf_map_create_opts, opts); + int key_size, value_size, max_entries; __u32 btf_key_type_id = 0, btf_value_type_id = 0; - struct bpf_create_map_attr attr = {}; - int fd = -1, btf_fd = -1, fd_inner; + int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; + + opts.map_ifindex = ifindex; key_size = sizeof(__u32); value_size = sizeof(__u32); max_entries = 1; - map_flags = 0; switch (map_type) { case BPF_MAP_TYPE_STACK_TRACE: @@ -218,7 +262,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_LPM_TRIE: key_size = sizeof(__u64); value_size = sizeof(__u64); - map_flags = BPF_F_NO_PREALLOC; + opts.map_flags = BPF_F_NO_PREALLOC; break; case BPF_MAP_TYPE_CGROUP_STORAGE: case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: @@ -237,17 +281,25 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) btf_value_type_id = 3; value_size = 8; max_entries = 0; - map_flags = BPF_F_NO_PREALLOC; + opts.map_flags = BPF_F_NO_PREALLOC; btf_fd = load_local_storage_btf(); if (btf_fd < 0) - return false; + return btf_fd; break; case BPF_MAP_TYPE_RINGBUF: key_size = 0; value_size = 0; max_entries = 4096; break; - case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_STRUCT_OPS: + /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ + opts.btf_vmlinux_value_type_id = 1; + exp_err = -524; /* -ENOTSUPP */ + break; + case BPF_MAP_TYPE_BLOOM_FILTER: + key_size = 0; + max_entries = 1; + break; case BPF_MAP_TYPE_HASH: case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PROG_ARRAY: @@ -266,9 +318,10 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: - case BPF_MAP_TYPE_STRUCT_OPS: - default: break; + case BPF_MAP_TYPE_UNSPEC: + default: + return -EOPNOTSUPP; } if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -277,37 +330,102 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) * map-in-map for offload */ if (ifindex) - return false; + goto cleanup; - fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(__u32), sizeof(__u32), 1, 0); + fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, + sizeof(__u32), sizeof(__u32), 1, NULL); if (fd_inner < 0) - return false; - fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), - fd_inner, 1, 0); - close(fd_inner); - } else { - /* Note: No other restriction on map type probes for offload */ - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.max_entries = max_entries; - attr.map_flags = map_flags; - attr.map_ifindex = ifindex; - if (btf_fd >= 0) { - attr.btf_fd = btf_fd; - attr.btf_key_type_id = btf_key_type_id; - attr.btf_value_type_id = btf_value_type_id; - } + goto cleanup; + + opts.inner_map_fd = fd_inner; + } - fd = bpf_create_map_xattr(&attr); + if (btf_fd >= 0) { + opts.btf_fd = btf_fd; + opts.btf_key_type_id = btf_key_type_id; + opts.btf_value_type_id = btf_value_type_id; } + + fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); + err = -errno; + +cleanup: if (fd >= 0) close(fd); + if (fd_inner >= 0) + close(fd_inner); if (btf_fd >= 0) close(btf_fd); - return fd >= 0; + if (exp_err) + return fd < 0 && err == exp_err ? 1 : 0; + else + return fd >= 0 ? 1 : 0; +} + +int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) +{ + int ret; + + if (opts) + return libbpf_err(-EINVAL); + + ret = probe_map_create(map_type, 0); + return libbpf_err(ret); +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ + return probe_map_create(map_type, ifindex) == 1; +} + +int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, + const void *opts) +{ + struct bpf_insn insns[] = { + BPF_EMIT_CALL((__u32)helper_id), + BPF_EXIT_INSN(), + }; + const size_t insn_cnt = ARRAY_SIZE(insns); + char buf[4096]; + int ret; + + if (opts) + return libbpf_err(-EINVAL); + + /* we can't successfully load all prog types to check for BPF helper + * support, so bail out with -EOPNOTSUPP error + */ + switch (prog_type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_STRUCT_OPS: + return -EOPNOTSUPP; + default: + break; + } + + buf[0] = '\0'; + ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); + if (ret < 0) + return libbpf_err(ret); + + /* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) + * at all, it will emit something like "invalid func unknown#181". + * If BPF verifier recognizes BPF helper but it's not supported for + * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". + * In both cases, provided combination of BPF program type and BPF + * helper is not supported by the kernel. + * In all other cases, probe_prog_load() above will either succeed (e.g., + * because BPF helper happens to accept no input arguments or it + * accepts one input argument and initial PTR_TO_CTX is fine for + * that), or we'll get some more specific BPF verifier error about + * some unsatisfied conditions. + */ + if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) + return 0; + return 1; /* assume supported */ } bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, @@ -320,8 +438,7 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, char buf[4096] = {}; bool res; - probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), - ifindex); + probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex); res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); if (ifindex) { @@ -353,8 +470,8 @@ bool bpf_probe_large_insn_limit(__u32 ifindex) insns[BPF_MAXINSNS] = BPF_EXIT_INSN(); errno = 0; - probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, - ifindex); + probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, + ifindex); return errno != E2BIG && errno != EINVAL; } diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index dd56d76f291c..0fefefc3500b 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 6 +#define LIBBPF_MINOR_VERSION 7 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index f677dccdeae4..9aa016fb55aa 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -210,6 +210,7 @@ void bpf_linker__free(struct bpf_linker *linker) } free(linker->secs); + free(linker->glob_syms); free(linker); } @@ -1999,7 +2000,7 @@ add_sym: static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj) { struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; - struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx]; + struct dst_sec *dst_symtab; int i, err; for (i = 1; i < obj->sec_cnt; i++) { @@ -2032,6 +2033,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob return -1; } + /* add_dst_sec() above could have invalidated linker->secs */ + dst_symtab = &linker->secs[linker->symtab_sec_idx]; + /* shdr->sh_link points to SYMTAB */ dst_sec->shdr->sh_link = linker->symtab_sec_idx; @@ -2650,6 +2654,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; int i, j, id, err; @@ -2686,7 +2691,8 @@ static int finalize_btf(struct bpf_linker *linker) return err; } - err = btf__dedup(linker->btf, linker->btf_ext, NULL); + opts.btf_ext = linker->btf_ext; + err = btf__dedup(linker->btf, &opts); if (err) { pr_warn("BTF dedup failed: %d\n", err); return err; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index b5b8956a1be8..910865e29edc 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1,6 +1,60 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2019 Facebook */ +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> +#include "relo_core.h" + +static const char *btf_kind_str(const struct btf_type *t) +{ + return btf_type_str(t); +} + +static bool is_ldimm64_insn(struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id) +{ + return btf_type_skip_modifiers(btf, id, res_id); +} + +static const char *btf__name_by_offset(const struct btf *btf, u32 offset) +{ + return btf_name_by_offset(btf, offset); +} + +static s64 btf__resolve_size(const struct btf *btf, u32 type_id) +{ + const struct btf_type *t; + int size; + + t = btf_type_by_id(btf, type_id); + t = btf_resolve_size(btf, t, &size); + if (IS_ERR(t)) + return PTR_ERR(t); + return size; +} + +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; + +#undef pr_warn +#undef pr_info +#undef pr_debug +#define pr_warn(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define pr_info(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define pr_debug(fmt, log, ...) bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define libbpf_print(level, fmt, ...) bpf_log((void *)prog_name, fmt, ##__VA_ARGS__) +#else #include <stdio.h> #include <string.h> #include <errno.h> @@ -12,33 +66,7 @@ #include "btf.h" #include "str_error.h" #include "libbpf_internal.h" - -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { - __u32 type_id; /* struct/union type or array element type */ - __u32 idx; /* field index or array index */ - const char *name; /* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { - const struct btf *btf; - /* high-level spec: named fields and array indices only */ - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; - /* original unresolved (no skip_mods_or_typedefs) root type ID */ - __u32 root_type_id; - /* CO-RE relocation kind */ - enum bpf_core_relo_kind relo_kind; - /* high-level spec length */ - int len; - /* raw, low-level spec: 1-to-1 with accessor spec string */ - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; - /* raw spec length */ - int raw_len; - /* field bit offset represented by spec */ - __u32 bit_offset; -}; +#endif static bool is_flex_arr(const struct btf *btf, const struct bpf_core_accessor *acc, @@ -51,25 +79,25 @@ static bool is_flex_arr(const struct btf *btf, return false; /* has to be the last member of enclosing struct */ - t = btf__type_by_id(btf, acc->type_id); + t = btf_type_by_id(btf, acc->type_id); return acc->idx == btf_vlen(t) - 1; } static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_FIELD_BYTE_OFFSET: return "byte_off"; - case BPF_FIELD_BYTE_SIZE: return "byte_sz"; - case BPF_FIELD_EXISTS: return "field_exists"; - case BPF_FIELD_SIGNED: return "signed"; - case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; - case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; - case BPF_TYPE_ID_LOCAL: return "local_type_id"; - case BPF_TYPE_ID_TARGET: return "target_type_id"; - case BPF_TYPE_EXISTS: return "type_exists"; - case BPF_TYPE_SIZE: return "type_size"; - case BPF_ENUMVAL_EXISTS: return "enumval_exists"; - case BPF_ENUMVAL_VALUE: return "enumval_value"; + case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_CORE_FIELD_EXISTS: return "field_exists"; + case BPF_CORE_FIELD_SIGNED: return "signed"; + case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; + case BPF_CORE_TYPE_EXISTS: return "type_exists"; + case BPF_CORE_TYPE_SIZE: return "type_size"; + case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_CORE_ENUMVAL_VALUE: return "enumval_value"; default: return "unknown"; } } @@ -77,12 +105,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_FIELD_BYTE_OFFSET: - case BPF_FIELD_BYTE_SIZE: - case BPF_FIELD_EXISTS: - case BPF_FIELD_SIGNED: - case BPF_FIELD_LSHIFT_U64: - case BPF_FIELD_RSHIFT_U64: + case BPF_CORE_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_EXISTS: + case BPF_CORE_FIELD_SIGNED: + case BPF_CORE_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: return true; default: return false; @@ -92,10 +120,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_TYPE_ID_LOCAL: - case BPF_TYPE_ID_TARGET: - case BPF_TYPE_EXISTS: - case BPF_TYPE_SIZE: + case BPF_CORE_TYPE_ID_LOCAL: + case BPF_CORE_TYPE_ID_TARGET: + case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_SIZE: return true; default: return false; @@ -105,8 +133,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) { switch (kind) { - case BPF_ENUMVAL_EXISTS: - case BPF_ENUMVAL_VALUE: + case BPF_CORE_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_VALUE: return true; default: return false; @@ -150,7 +178,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access * string to specify enumerator's value index that need to be relocated. */ -static int bpf_core_parse_spec(const struct btf *btf, +static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, __u32 type_id, const char *spec_str, enum bpf_core_relo_kind relo_kind, @@ -272,8 +300,8 @@ static int bpf_core_parse_spec(const struct btf *btf, return sz; spec->bit_offset += access_idx * sz * 8; } else { - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - type_id, spec_str, i, id, btf_kind_str(t)); + pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", + prog_name, type_id, spec_str, i, id, btf_kind_str(t)); return -EINVAL; } } @@ -346,8 +374,6 @@ recur: targ_id = btf_array(targ_type)->type; goto recur; default: - pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); return 0; } } @@ -388,7 +414,7 @@ static int bpf_core_match_member(const struct btf *local_btf, return 0; local_id = local_acc->type_id; - local_type = btf__type_by_id(local_btf, local_id); + local_type = btf_type_by_id(local_btf, local_id); local_member = btf_members(local_type) + local_acc->idx; local_name = btf__name_by_offset(local_btf, local_member->name_off); @@ -571,7 +597,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, *field_sz = 0; - if (relo->kind == BPF_FIELD_EXISTS) { + if (relo->kind == BPF_CORE_FIELD_EXISTS) { *val = spec ? 1 : 0; return 0; } @@ -580,11 +606,11 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EUCLEAN; /* request instruction poisoning */ acc = &spec->spec[spec->len - 1]; - t = btf__type_by_id(spec->btf, acc->type_id); + t = btf_type_by_id(spec->btf, acc->type_id); /* a[n] accessor needs special handling */ if (!acc->name) { - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; /* remember field size for load/store mem size */ sz = btf__resolve_size(spec->btf, acc->type_id); @@ -592,7 +618,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, return -EINVAL; *field_sz = sz; *type_id = acc->type_id; - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + } else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) { sz = btf__resolve_size(spec->btf, acc->type_id); if (sz < 0) return -EINVAL; @@ -644,36 +670,36 @@ static int bpf_core_calc_field_relo(const char *prog_name, *validate = !bitfield; switch (relo->kind) { - case BPF_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { *field_sz = byte_sz; *type_id = field_type_id; } break; - case BPF_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_BYTE_SIZE: *val = byte_sz; break; - case BPF_FIELD_SIGNED: + case BPF_CORE_FIELD_SIGNED: /* enums will be assumed unsigned */ *val = btf_is_enum(mt) || (btf_int_encoding(mt) & BTF_INT_SIGNED); if (validate) *validate = true; /* signedness is never ambiguous */ break; - case BPF_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_LSHIFT_U64: #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ *val = 64 - (bit_off + bit_sz - byte_off * 8); #else *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); #endif break; - case BPF_FIELD_RSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: *val = 64 - bit_sz; if (validate) *validate = true; /* right shift is never ambiguous */ break; - case BPF_FIELD_EXISTS: + case BPF_CORE_FIELD_EXISTS: default: return -EOPNOTSUPP; } @@ -683,10 +709,14 @@ static int bpf_core_calc_field_relo(const char *prog_name, static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, const struct bpf_core_spec *spec, - __u32 *val) + __u32 *val, bool *validate) { __s64 sz; + /* by default, always check expected value in bpf_insn */ + if (validate) + *validate = true; + /* type-based relos return zero when target type is not found */ if (!spec) { *val = 0; @@ -694,20 +724,25 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, } switch (relo->kind) { - case BPF_TYPE_ID_TARGET: + case BPF_CORE_TYPE_ID_TARGET: *val = spec->root_type_id; + /* type ID, embedded in bpf_insn, might change during linking, + * so enforcing it is pointless + */ + if (validate) + *validate = false; break; - case BPF_TYPE_EXISTS: + case BPF_CORE_TYPE_EXISTS: *val = 1; break; - case BPF_TYPE_SIZE: + case BPF_CORE_TYPE_SIZE: sz = btf__resolve_size(spec->btf, spec->root_type_id); if (sz < 0) return -EINVAL; *val = sz; break; - case BPF_TYPE_ID_LOCAL: - /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + case BPF_CORE_TYPE_ID_LOCAL: + /* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */ default: return -EOPNOTSUPP; } @@ -723,13 +758,13 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, const struct btf_enum *e; switch (relo->kind) { - case BPF_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_EXISTS: *val = spec ? 1 : 0; break; - case BPF_ENUMVAL_VALUE: + case BPF_CORE_ENUMVAL_VALUE: if (!spec) return -EUCLEAN; /* request instruction poisoning */ - t = btf__type_by_id(spec->btf, spec->spec[0].type_id); + t = btf_type_by_id(spec->btf, spec->spec[0].type_id); e = btf_enum(t) + spec->spec[0].idx; *val = e->val; break; @@ -805,8 +840,8 @@ static int bpf_core_calc_relo(const char *prog_name, if (res->orig_sz != res->new_sz) { const struct btf_type *orig_t, *new_t; - orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); - new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf_type_by_id(targ_spec->btf, res->new_type_id); /* There are two use cases in which it's safe to * adjust load/store's mem size: @@ -835,8 +870,8 @@ static int bpf_core_calc_relo(const char *prog_name, res->fail_memsz_adjust = true; } } else if (core_relo_is_type_based(relo->kind)) { - err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); - err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate); + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL); } else if (core_relo_is_enumval_based(relo->kind)) { err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); @@ -1045,7 +1080,7 @@ poison: * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec) { const struct btf_type *t; const struct btf_enum *e; @@ -1054,7 +1089,7 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) int i; type_id = spec->root_type_id; - t = btf__type_by_id(spec->btf, type_id); + t = btf_type_by_id(spec->btf, type_id); s = btf__name_by_offset(spec->btf, t->name_off); libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); @@ -1147,9 +1182,12 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - struct bpf_core_cand_list *cands) + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch) { - struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; + struct bpf_core_spec *local_spec = &specs_scratch[0]; + struct bpf_core_spec *cand_spec = &specs_scratch[1]; + struct bpf_core_spec *targ_spec = &specs_scratch[2]; struct bpf_core_relo_res cand_res, targ_res; const struct btf_type *local_type; const char *local_name; @@ -1158,10 +1196,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, int i, j, err; local_id = relo->type_id; - local_type = btf__type_by_id(local_btf, local_id); - if (!local_type) - return -EINVAL; - + local_type = btf_type_by_id(local_btf, local_id); local_name = btf__name_by_offset(local_btf, local_type->name_off); if (!local_name) return -EINVAL; @@ -1170,7 +1205,8 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, if (str_is_empty(spec_str)) return -EINVAL; - err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); + err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, + relo->kind, local_spec); if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", prog_name, relo_idx, local_id, btf_kind_str(local_type), @@ -1181,15 +1217,17 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ - if (relo->kind == BPF_TYPE_ID_LOCAL) { - targ_res.validate = true; + if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { + /* bpf_insn's imm value could get out of sync during linking */ + memset(&targ_res, 0, sizeof(targ_res)); + targ_res.validate = false; targ_res.poison = false; - targ_res.orig_val = local_spec.root_type_id; - targ_res.new_val = local_spec.root_type_id; + targ_res.orig_val = local_spec->root_type_id; + targ_res.new_val = local_spec->root_type_id; goto patch_insn; } @@ -1200,40 +1238,39 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, return -EOPNOTSUPP; } - for (i = 0, j = 0; i < cands->len; i++) { - err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, - cands->cands[i].id, &cand_spec); + err = bpf_core_spec_match(local_spec, cands->cands[i].btf, + cands->cands[i].id, cand_spec); if (err < 0) { pr_warn("prog '%s': relo #%d: error matching candidate #%d ", prog_name, relo_idx, i); - bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); + bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec); libbpf_print(LIBBPF_WARN, ": %d\n", err); return err; } pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec); libbpf_print(LIBBPF_DEBUG, "\n"); if (err == 0) continue; - err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res); if (err) return err; if (j == 0) { targ_res = cand_res; - targ_spec = cand_spec; - } else if (cand_spec.bit_offset != targ_spec.bit_offset) { + *targ_spec = *cand_spec; + } else if (cand_spec->bit_offset != targ_spec->bit_offset) { /* if there are many field relo candidates, they * should all resolve to the same bit offset */ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec.bit_offset, - targ_spec.bit_offset); + prog_name, relo_idx, cand_spec->bit_offset, + targ_spec->bit_offset); return -EINVAL; } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { /* all candidates should result in the same relocation @@ -1251,7 +1288,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, } /* - * For BPF_FIELD_EXISTS relo or when used BPF program has field + * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field * existence checks or kernel version/config checks, it's expected * that we might not find any candidates. In this case, if field * wasn't found in any candidate, the list of candidates shouldn't @@ -1277,7 +1314,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx); /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); if (err) return err; } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 3b9f8f18346c..17799819ad7c 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -4,81 +4,10 @@ #ifndef __RELO_CORE_H #define __RELO_CORE_H -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ - BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ - BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ - BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ - BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ - BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ - BPF_TYPE_SIZE = 9, /* type size in bytes */ - BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ - BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ -}; - -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * type or field; - * - access_str_off - offset into corresponding .BTF string section. String - * interpretation depends on specific relocation kind: - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_core_relo_kind kind; -}; +#include <linux/bpf.h> struct bpf_core_cand { const struct btf *btf; - const struct btf_type *t; - const char *name; __u32 id; }; @@ -88,11 +17,39 @@ struct bpf_core_cand_list { int len; }; +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* original unresolved (no skip_mods_or_typedefs) root type ID */ + __u32 root_type_id; + /* CO-RE relocation kind */ + enum bpf_core_relo_kind relo_kind; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field bit offset represented by spec */ + __u32 bit_offset; +}; + int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, int insn_idx, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, - struct bpf_core_cand_list *cands); + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch); int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 9cf66702fa8d..0b84d8e6b72a 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -7,6 +7,16 @@ #include <sys/syscall.h> #include <sys/mman.h> +#ifndef __NR_bpf +# if defined(__mips__) && defined(_ABIO32) +# define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +# define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +# define __NR_bpf 5315 +# endif +#endif + /* This file is a base header for auto-generated *.lskel.h files. * Its contents will change and may become part of auto-generation in the future. * @@ -65,8 +75,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, - opts->data_sz, 1, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL); if (map_fd < 0) { opts->errstr = "failed to create loader map"; err = -errno; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 81f8fbc85e70..edafe56664f3 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -35,6 +35,11 @@ #include "libbpf_internal.h" #include "xsk.h" +/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal + * uses of deprecated APIs + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #ifndef SOL_XDP #define SOL_XDP 283 #endif @@ -364,8 +369,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, static enum xsk_prog get_xsk_prog(void) { enum xsk_prog detected = XSK_PROG_FALLBACK; - struct bpf_load_program_attr prog_attr; - struct bpf_create_map_attr map_attr; __u32 size_out, retval, duration; char data_in = 0, data_out; struct bpf_insn insns[] = { @@ -375,27 +378,15 @@ static enum xsk_prog get_xsk_prog(void) BPF_EMIT_CALL(BPF_FUNC_redirect_map), BPF_EXIT_INSN(), }; - int prog_fd, map_fd, ret; - - memset(&map_attr, 0, sizeof(map_attr)); - map_attr.map_type = BPF_MAP_TYPE_XSKMAP; - map_attr.key_size = sizeof(int); - map_attr.value_size = sizeof(int); - map_attr.max_entries = 1; + int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); - map_fd = bpf_create_map_xattr(&map_attr); + map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); if (map_fd < 0) return detected; insns[0].imm = map_fd; - memset(&prog_attr, 0, sizeof(prog_attr)); - prog_attr.prog_type = BPF_PROG_TYPE_XDP; - prog_attr.insns = insns; - prog_attr.insns_cnt = ARRAY_SIZE(insns); - prog_attr.license = "GPL"; - - prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); if (prog_fd < 0) { close(map_fd); return detected; @@ -495,10 +486,13 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) }; struct bpf_insn *progs[] = {prog, prog_redirect_flags}; enum xsk_prog option = get_xsk_prog(); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_buf = log_buf, + .log_size = log_buf_size, + ); - prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], - "LGPL-2.1 or BSD-2-Clause", 0, log_buf, - log_buf_size); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", + progs[option], insns_cnt[option], &opts); if (prog_fd < 0) { pr_warn("BPF log buffer:\n%s", log_buf); return prog_fd; @@ -554,8 +548,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) return -errno; ifr.ifr_data = (void *)&channels; - memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); - ifr.ifr_name[IFNAMSIZ - 1] = '\0'; + libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { ret = -errno; @@ -590,8 +583,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk) if (max_queues < 0) return max_queues; - fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", - sizeof(int), sizeof(int), max_queues, 0); + fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, NULL); if (fd < 0) return fd; @@ -725,14 +718,12 @@ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) static bool xsk_probe_bpf_link(void) { - DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, - .flags = XDP_FLAGS_SKB_MODE); - struct bpf_load_program_attr prog_attr; + LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); struct bpf_insn insns[2] = { BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), BPF_EXIT_INSN() }; - int prog_fd, link_fd = -1; + int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); int ifindex_lo = 1; bool ret = false; int err; @@ -744,13 +735,7 @@ static bool xsk_probe_bpf_link(void) if (link_fd >= 0) return true; - memset(&prog_attr, 0, sizeof(prog_attr)); - prog_attr.prog_type = BPF_PROG_TYPE_XDP; - prog_attr.insns = insns; - prog_attr.insns_cnt = ARRAY_SIZE(insns); - prog_attr.license = "GPL"; - - prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); if (prog_fd < 0) return ret; @@ -782,8 +767,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) } ctx->ifindex = ifindex; - memcpy(ctx->ifname, ifname, IFNAMSIZ -1); - ctx->ifname[IFNAMSIZ - 1] = 0; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); xsk->ctx = ctx; xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); @@ -965,8 +949,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, ctx->refcount = 1; ctx->umem = umem; ctx->queue_id = queue_id; - memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); - ctx->ifname[IFNAMSIZ - 1] = '\0'; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); ctx->fill = fill; ctx->comp = comp; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 63ae5e0195ce..32c5051c24eb 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -48,6 +48,7 @@ SYNOPSIS int perf_cpu_map__nr(const struct perf_cpu_map *cpus); bool perf_cpu_map__empty(const struct perf_cpu_map *map); int perf_cpu_map__max(struct perf_cpu_map *map); + bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) -- @@ -135,16 +136,16 @@ SYNOPSIS int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel); - void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); + void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__mmap(struct perf_evsel *evsel, int pages); void perf_evsel__munmap(struct perf_evsel *evsel); - void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); - int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); + int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); int perf_evsel__enable(struct perf_evsel *evsel); - int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); int perf_evsel__disable(struct perf_evsel *evsel); - int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index adaad3dddf6e..ee66760f1e63 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -10,15 +10,24 @@ #include <ctype.h> #include <limits.h> -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus) { - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); + struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus); if (cpus != NULL) { - cpus->nr = 1; - cpus->map[0] = -1; + cpus->nr = nr_cpus; refcount_set(&cpus->refcnt, 1); + } + return cpus; +} + +struct perf_cpu_map *perf_cpu_map__dummy_new(void) +{ + struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + + if (cpus) + cpus->map[0].cpu = -1; return cpus; } @@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void) if (nr_cpus < 0) return NULL; - cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); + cpus = perf_cpu_map__alloc(nr_cpus); if (cpus != NULL) { int i; for (i = 0; i < nr_cpus; ++i) - cpus->map[i] = i; - - cpus->nr = nr_cpus; - refcount_set(&cpus->refcnt, 1); + cpus->map[i].cpu = i; } return cpus; @@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void) return cpu_map__default_new(); } -static int cmp_int(const void *a, const void *b) + +static int cmp_cpu(const void *a, const void *b) { - return *(const int *)a - *(const int*)b; + const struct perf_cpu *cpu_a = a, *cpu_b = b; + + return cpu_a->cpu - cpu_b->cpu; } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus) { - size_t payload_size = nr_cpus * sizeof(int); - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); + size_t payload_size = nr_cpus * sizeof(struct perf_cpu); + struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus); int i, j; if (cpus != NULL) { memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); + qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu); /* Remove dups */ j = 0; for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i] != cpus->map[i - 1]) - cpus->map[j++] = cpus->map[i]; + if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) + cpus->map[j++].cpu = cpus->map[i].cpu; } cpus->nr = j; assert(j <= nr_cpus); - refcount_set(&cpus->refcnt, 1); } - return cpus; } @@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) { struct perf_cpu_map *cpus = NULL; int nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; int n, cpu, prev; char sep; @@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file) if (new_max >= max_entries) { max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } while (++prev < cpu) - tmp_cpus[nr_cpus++] = prev; + tmp_cpus[nr_cpus++].cpu = prev; } if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto out_free_tmp; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = cpu; + tmp_cpus[nr_cpus++].cpu = cpu; if (n == 2 && sep == '-') prev = cpu; else @@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) unsigned long start_cpu, end_cpu = 0; char *p = NULL; int i, nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; + struct perf_cpu *tmp_cpus = NULL, *tmp; int max_entries = 0; if (!cpu_list) @@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) for (; start_cpu <= end_cpu; start_cpu++) { /* check for duplicates */ for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i] == (int)start_cpu) + if (tmp_cpus[i].cpu == (int)start_cpu) goto invalid; if (nr_cpus == max_entries) { max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); + tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu)); if (tmp == NULL) goto invalid; tmp_cpus = tmp; } - tmp_cpus[nr_cpus++] = (int)start_cpu; + tmp_cpus[nr_cpus++].cpu = (int)start_cpu; } if (*p) ++p; @@ -250,12 +257,16 @@ out: return cpus; } -int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) { + struct perf_cpu result = { + .cpu = -1 + }; + if (cpus && idx < cpus->nr) return cpus->map[idx]; - return -1; + return result; } int perf_cpu_map__nr(const struct perf_cpu_map *cpus) @@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus) bool perf_cpu_map__empty(const struct perf_cpu_map *map) { - return map ? map->map[0] == -1 : true; + return map ? map->map[0].cpu == -1 : true; } -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { - int low = 0, high = cpus->nr; + int low, high; + if (!cpus) + return -1; + + low = 0; + high = cpus->nr; while (low < high) { - int idx = (low + high) / 2, - cpu_at_idx = cpus->map[idx]; + int idx = (low + high) / 2; + struct perf_cpu cpu_at_idx = cpus->map[idx]; - if (cpu_at_idx == cpu) + if (cpu_at_idx.cpu == cpu.cpu) return idx; - if (cpu_at_idx > cpu) + if (cpu_at_idx.cpu > cpu.cpu) high = idx; else low = idx + 1; @@ -288,10 +304,19 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) return -1; } -int perf_cpu_map__max(struct perf_cpu_map *map) +bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu) { + return perf_cpu_map__idx(cpus, cpu) != -1; +} + +struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) +{ + struct perf_cpu result = { + .cpu = -1 + }; + // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. - return map->nr > 0 ? map->map[map->nr - 1] : -1; + return map->nr > 0 ? map->map[map->nr - 1] : result; } /* @@ -305,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map) struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other) { - int *tmp_cpus; + struct perf_cpu *tmp_cpus; int tmp_len; int i, j, k; struct perf_cpu_map *merged; @@ -319,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, if (!other) return orig; if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(int))) + !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu))) return orig; tmp_len = orig->nr + other->nr; - tmp_cpus = malloc(tmp_len * sizeof(int)); + tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu)); if (!tmp_cpus) return NULL; /* Standard merge algorithm from wikipedia */ i = j = k = 0; while (i < orig->nr && j < other->nr) { - if (orig->map[i] <= other->map[j]) { - if (orig->map[i] == other->map[j]) + if (orig->map[i].cpu <= other->map[j].cpu) { + if (orig->map[i].cpu == other->map[j].cpu) j++; tmp_cpus[k++] = orig->map[i++]; } else diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index e37dfad31383..9a770bfdc804 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int output, int cpu) + int output, struct perf_cpu cpu) { return perf_mmap__mmap(map, mp, output, cpu); } @@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, int idx, struct perf_mmap_param *mp, int cpu_idx, int thread, int *_output, int *_output_overwrite) { - int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); + struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); struct perf_evsel *evsel; int revent; @@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; } -void __perf_evlist__set_leader(struct list_head *list) +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader) { - struct perf_evsel *evsel, *leader; + struct perf_evsel *first, *last, *evsel; - leader = list_entry(list->next, struct perf_evsel, node); - evsel = list_entry(list->prev, struct perf_evsel, node); + first = list_first_entry(list, struct perf_evsel, node); + last = list_last_entry(list, struct perf_evsel, node); - leader->nr_members = evsel->idx - leader->idx + 1; + leader->nr_members = last->idx - first->idx + 1; __perf_evlist__for_each_entry(list, evsel) evsel->leader = leader; @@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list) void perf_evlist__set_leader(struct perf_evlist *evlist) { if (evlist->nr_entries) { + struct perf_evsel *first = list_entry(evlist->entries.next, + struct perf_evsel, node); + evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; - __perf_evlist__set_leader(&evlist->entries); + __perf_evlist__set_leader(&evlist->entries, first); } } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8441e3e1aaac..7ea86a44eae5 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) -#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) +#define FD(_evsel, _cpu_map_idx, _thread) \ + ((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread)) +#define MMAP(_evsel, _cpu_map_idx, _thread) \ + (_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \ + : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); if (evsel->fd) { - int cpu, thread; - for (cpu = 0; cpu < ncpus; cpu++) { + int idx, thread; + + for (idx = 0; idx < ncpus; idx++) { for (thread = 0; thread < nthreads; thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd) *fd = -1; @@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre static int sys_perf_event_open(struct perf_event_attr *attr, - pid_t pid, int cpu, int group_fd, + pid_t pid, struct perf_cpu cpu, int group_fd, unsigned long flags) { - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); + return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags); } -static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) +static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; int *fd; @@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou if (!leader->fd) return -ENOTCONN; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); if (fd == NULL || *fd == -1) return -EBADF; @@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - int cpu, thread, err = 0; + struct perf_cpu cpu; + int idx, thread, err = 0; if (cpus == NULL) { static struct perf_cpu_map *empty_cpu_map; @@ -139,21 +144,21 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) return -ENOMEM; - for (cpu = 0; cpu < cpus->nr; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { for (thread = 0; thread < threads->nr; thread++) { int fd, group_fd, *evsel_fd; - evsel_fd = FD(evsel, cpu, thread); + evsel_fd = FD(evsel, idx, thread); if (evsel_fd == NULL) return -EINVAL; - err = get_group_fd(evsel, cpu, thread, &group_fd); + err = get_group_fd(evsel, idx, thread, &group_fd); if (err < 0) return err; fd = sys_perf_event_open(&evsel->attr, threads->map[thread].pid, - cpus->map[cpu], group_fd, 0); + cpu, group_fd, 0); if (fd < 0) return -errno; @@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, return err; } -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd && *fd >= 0) { close(*fd); @@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) void perf_evsel__close_fd(struct perf_evsel *evsel) { - int cpu; - - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) - perf_evsel__close_fd_cpu(evsel, cpu); + for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++) + perf_evsel__close_fd_cpu(evsel, idx); } void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel) perf_evsel__free_fd(evsel); } -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx) { if (evsel->fd == NULL) return; - perf_evsel__close_fd_cpu(evsel, cpu); + perf_evsel__close_fd_cpu(evsel, cpu_map_idx); } void perf_evsel__munmap(struct perf_evsel *evsel) { - int cpu, thread; + int idx, thread; if (evsel->fd == NULL || evsel->mmap == NULL) return; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(MMAP(evsel, cpu, thread)); + perf_mmap__munmap(MMAP(evsel, idx, thread)); } } @@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel) int perf_evsel__mmap(struct perf_evsel *evsel, int pages) { - int ret, cpu, thread; + int ret, idx, thread; struct perf_mmap_param mp = { .prot = PROT_READ | PROT_WRITE, .mask = (pages * page_size) - 1, @@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) return -ENOMEM; - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, idx, thread); struct perf_mmap *map; + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx); if (fd == NULL || *fd < 0) continue; - map = MMAP(evsel, cpu, thread); + map = MMAP(evsel, idx, thread); perf_mmap__init(map, NULL, false, NULL); ret = perf_mmap__mmap(map, &mp, *fd, cpu); @@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) return 0; } -void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread) { - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); - if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL) return NULL; - return MMAP(evsel, cpu, thread)->base; + return MMAP(evsel, cpu_map_idx, thread)->base; } int perf_evsel__read_size(struct perf_evsel *evsel) @@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel) return size; } -int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); memset(count, 0, sizeof(*count)); if (fd == NULL || *fd < 0) return -EINVAL; - if (MMAP(evsel, cpu, thread) && - !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) + if (MMAP(evsel, cpu_map_idx, thread) && + !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count)) return 0; if (readn(*fd, count->values, size) <= 0) @@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ioc, void *arg, - int cpu) + int cpu_map_idx) { int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { int err; - int *fd = FD(evsel, cpu, thread); + int *fd = FD(evsel, cpu_map_idx, thread); if (fd == NULL || *fd < 0) return -1; @@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, return 0; } -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx); } int perf_evsel__enable(struct perf_evsel *evsel) @@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel) return err; } -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx) { - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); + return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx); } int perf_evsel__disable(struct perf_evsel *evsel) @@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel) zfree(&evsel->id); evsel->ids = 0; } + +void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled) +{ + s8 scaled = 0; + + if (scale) { + if (count->run == 0) { + scaled = -1; + count->val = 0; + } else if (count->run < count->ena) { + scaled = 1; + count->val = (u64)((double)count->val * count->ena / count->run); + } + } + + if (pscaled) + *pscaled = scaled; +} diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..581f9ffb4237 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -4,16 +4,30 @@ #include <linux/refcount.h> +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { + int cpu; +}; + +/** + * A sized, reference counted, sorted array of integers representing CPU + * numbers. This is commonly used to capture which CPUs a PMU is associated + * with. The indices into the cpumap are frequently used as they avoid having + * gaps if CPU numbers were used. For events associated with a pid, rather than + * a CPU, a single dummy map with an entry of -1 is used. + */ struct perf_cpu_map { refcount_t refcnt; + /** Length of the map array. */ int nr; - int map[]; + /** The CPU values. */ + struct perf_cpu map[]; }; #ifndef MAX_NR_CPUS #define MAX_NR_CPUS 2048 #endif -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu); #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index f366dbad6a88..4cefade540bd 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,6 +4,7 @@ #include <linux/list.h> #include <api/fd/array.h> +#include <internal/cpumap.h> #include <internal/evsel.h> #define PERF_EVLIST__HLIST_BITS 8 @@ -36,7 +37,7 @@ typedef void typedef struct perf_mmap* (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu); struct perf_evlist_mmap_ops { perf_evlist_mmap__cb_idx_t idx; @@ -127,5 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -void __perf_evlist__set_leader(struct list_head *list); +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1f3eacbad2e8..cfc9ebd7968e 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,8 +6,8 @@ #include <linux/perf_event.h> #include <stdbool.h> #include <sys/types.h> +#include <internal/cpumap.h> -struct perf_cpu_map; struct perf_thread_map; struct xyarray; @@ -27,7 +27,7 @@ struct perf_sample_id { * queue number. */ int idx; - int cpu; + struct perf_cpu cpu; pid_t tid; /* Holds total ID period value for PERF_SAMPLE_READ processing. */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5e3422f40ed5..5a062af8e9d8 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,6 +6,7 @@ #include <linux/refcount.h> #include <linux/types.h> #include <stdbool.h> +#include <internal/cpumap.h> /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) @@ -24,7 +25,7 @@ struct perf_mmap { void *base; int mask; int fd; - int cpu; + struct perf_cpu cpu; refcount_t refcnt; u64 prev; u64 start; @@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map); void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, bool overwrite, libperf_unmap_cb_t unmap_cb); int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu); + int fd, struct perf_cpu cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); void perf_mmap__put(struct perf_mmap *map); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 7c27766ea0bf..15b8faafd615 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,11 +3,10 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> +#include <perf/cpumap.h> #include <stdio.h> #include <stdbool.h> -struct perf_cpu_map; - LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); @@ -16,10 +15,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, struct perf_cpu_map *other); LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu); #define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 60eae25076d3..2a9516b42d15 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,6 +4,8 @@ #include <stdint.h> #include <perf/core.h> +#include <stdbool.h> +#include <linux/types.h> struct perf_evsel; struct perf_event_attr; @@ -26,18 +28,20 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); -LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx); LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, + bool scale, __s8 *pscaled); #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 71468606e8a7..93696affda2e 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -10,6 +10,7 @@ LIBPERF_0.0.1 { perf_cpu_map__cpu; perf_cpu_map__empty; perf_cpu_map__max; + perf_cpu_map__has; perf_thread_map__new_dummy; perf_thread_map__set_pid; perf_thread_map__comm; @@ -50,6 +51,7 @@ LIBPERF_0.0.1 { perf_mmap__read_init; perf_mmap__read_done; perf_mmap__read_event; + perf_counts_values__scale; local: *; }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index c89dfa5f67b3..f7ee07cb5818 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -32,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu) + int fd, struct perf_cpu cpu) { map->prev = 0; map->mask = mp->mask; @@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count count->ena += delta; if (idx) count->run += delta; - - cnt = mul_u64_u64_div64(cnt, count->ena, count->run); } count->val = cnt; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ce91a582f0e4..b3479dfa9a1c 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -21,6 +21,9 @@ #include "tests.h" #include <internal/evsel.h> +#define EVENT_NUM 15 +#define WAIT_COUNT 100000000UL + static int libperf_print(enum libperf_print_level level, const char *fmt, va_list ap) { @@ -331,7 +334,8 @@ static int test_mmap_cpus(void) }; cpu_set_t saved_mask; char path[PATH_MAX]; - int id, err, cpu, tmp; + int id, err, tmp; + struct perf_cpu cpu; union perf_event *event; int count = 0; @@ -374,7 +378,7 @@ static int test_mmap_cpus(void) cpu_set_t mask; CPU_ZERO(&mask); - CPU_SET(cpu, &mask); + CPU_SET(cpu.cpu, &mask); err = sched_setaffinity(0, sizeof(mask), &mask); __T("sched_setaffinity failed", err == 0); @@ -413,6 +417,159 @@ static int test_mmap_cpus(void) return 0; } +static double display_error(long long average, + long long high, + long long low, + long long expected) +{ + double error; + + error = (((double)average - expected) / expected) * 100.0; + + __T_VERBOSE(" Expected: %lld\n", expected); + __T_VERBOSE(" High: %lld Low: %lld Average: %lld\n", + high, low, average); + + __T_VERBOSE(" Average Error = %.2f%%\n", error); + + return error; +} + +static int test_stat_multiplexing(void) +{ + struct perf_counts_values expected_counts = { .val = 0 }; + struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; + struct perf_thread_map *threads; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING, + .disabled = 1, + }; + int err, i, nonzero = 0; + unsigned long count; + long long max = 0, min = 0, avg = 0; + double error = 0.0; + s8 scaled = 0; + + /* read for non-multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + err = perf_evsel__open(evsel, NULL, threads); + __T("failed to open evsel", err == 0); + + err = perf_evsel__enable(evsel); + __T("failed to enable evsel", err == 0); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + perf_evsel__read(evsel, 0, 0, &expected_counts); + __T("failed to read value for evsel", expected_counts.val != 0); + __T("failed to read non-multiplexing event count", + expected_counts.ena == expected_counts.run); + + err = perf_evsel__disable(evsel); + __T("failed to enable evsel", err == 0); + + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + perf_thread_map__put(threads); + + /* read for multiplexing event count */ + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evlist = perf_evlist__new(); + __T("failed to create evlist", evlist); + + for (i = 0; i < EVENT_NUM; i++) { + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + perf_evlist__add(evlist, evsel); + } + perf_evlist__set_maps(evlist, NULL, threads); + + err = perf_evlist__open(evlist); + __T("failed to open evsel", err == 0); + + perf_evlist__enable(evlist); + + /* wait loop */ + count = WAIT_COUNT; + while (count--) + ; + + i = 0; + perf_evlist__for_each_evsel(evlist, evsel) { + perf_evsel__read(evsel, 0, 0, &counts[i]); + __T("failed to read value for evsel", counts[i].val != 0); + i++; + } + + perf_evlist__disable(evlist); + + min = counts[0].val; + for (i = 0; i < EVENT_NUM; i++) { + __T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n", + i, counts[i].val, counts[i].run, counts[i].ena); + + perf_counts_values__scale(&counts[i], true, &scaled); + if (scaled == 1) { + __T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n", + counts[i].val, + (double)counts[i].run / (double)counts[i].ena * 100.0, + counts[i].run, counts[i].ena); + } else if (scaled == -1) { + __T_VERBOSE("\t Not Running\n"); + } else { + __T_VERBOSE("\t Not Scaling\n"); + } + + if (counts[i].val > max) + max = counts[i].val; + + if (counts[i].val < min) + min = counts[i].val; + + avg += counts[i].val; + + if (counts[i].val != 0) + nonzero++; + } + + if (nonzero != 0) + avg = avg / nonzero; + else + avg = 0; + + error = display_error(avg, max, min, expected_counts.val); + + __T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); + + perf_evlist__close(evlist); + perf_evlist__delete(evlist); + + perf_thread_map__put(threads); + + return 0; +} + int test_evlist(int argc, char **argv) { __T_START; @@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv) test_stat_thread_enable(); test_mmap_thread(); test_mmap_cpus(); + test_stat_multiplexing(); __T_END; return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fe58843d047c..8e24c4c78c7f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field) return 0; } +static int field_is_relative_dynamic(struct tep_format_field *field) +{ + if (strncmp(field->type, "__rel_loc", 9) == 0) + return 1; + + return 0; +} + static int field_is_long(struct tep_format_field *field) { /* includes long long */ @@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** field->flags |= TEP_FIELD_IS_STRING; if (field_is_dynamic(field)) field->flags |= TEP_FIELD_IS_DYNAMIC; + if (field_is_relative_dynamic(field)) + field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE; if (field_is_long(field)) field->flags |= TEP_FIELD_IS_LONG; @@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, arg->type = TEP_PRINT_STRING; arg->string.string = token; - arg->string.offset = -1; + arg->string.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar arg->type = TEP_PRINT_BITMASK; arg->bitmask.bitmask = token; - arg->bitmask.offset = -1; + arg->bitmask.field = NULL; if (read_expected(TEP_EVENT_DELIM, ")") < 0) goto out_err; @@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, free_token(token); return process_int_array(event, arg, tok); } - if (strcmp(token, "__get_str") == 0) { + if (strcmp(token, "__get_str") == 0 || + strcmp(token, "__get_rel_str") == 0) { free_token(token); return process_str(event, arg, tok); } - if (strcmp(token, "__get_bitmask") == 0) { + if (strcmp(token, "__get_bitmask") == 0 || + strcmp(token, "__get_rel_bitmask") == 0) { free_token(token); return process_bitmask(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array") == 0) { + if (strcmp(token, "__get_dynamic_array") == 0 || + strcmp(token, "__get_rel_dynamic_array") == 0) { free_token(token); return process_dynamic_array(event, arg, tok); } - if (strcmp(token, "__get_dynamic_array_len") == 0) { + if (strcmp(token, "__get_dynamic_array_len") == 0 || + strcmp(token, "__get_rel_dynamic_array_len") == 0) { free_token(token); return process_dynamic_array_len(event, arg, tok); } @@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case TEP_PRINT_STRING: { int str_offset; - if (arg->string.offset == -1) { - struct tep_format_field *f; + if (!arg->string.field) + arg->string.field = tep_find_any_field(event, arg->string.string); + if (!arg->string.field) + break; - f = tep_find_any_field(event, arg->string.string); - arg->string.offset = f->offset; - } - str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); + str_offset = data2host4(tep, + *(unsigned int *)(data + arg->string.field->offset)); str_offset &= 0xffff; + if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) + str_offset += arg->string.field->offset + arg->string.field->size; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; } @@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, int bitmask_offset; int bitmask_size; - if (arg->bitmask.offset == -1) { - struct tep_format_field *f; - - f = tep_find_any_field(event, arg->bitmask.bitmask); - arg->bitmask.offset = f->offset; - } - bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); + if (!arg->bitmask.field) + arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); + if (!arg->bitmask.field) + break; + bitmask_offset = data2host4(tep, + *(unsigned int *)(data + arg->bitmask.field->offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; + if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) + bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size; print_bitmask_to_seq(tep, s, format, len_arg, data + bitmask_offset, bitmask_size); break; @@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, data + offset, field->size); *len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else *len = field->size; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a67ad9a5b835..41d4f9f6a843 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -125,6 +125,7 @@ enum tep_format_flags { TEP_FIELD_IS_LONG = 32, TEP_FIELD_IS_FLAG = 64, TEP_FIELD_IS_SYMBOLIC = 128, + TEP_FIELD_IS_RELATIVE = 256, }; struct tep_format_field { @@ -153,12 +154,12 @@ struct tep_print_arg_atom { struct tep_print_arg_string { char *string; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_bitmask { char *bitmask; - int offset; + struct tep_format_field *field; }; struct tep_print_arg_field { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 368826bb5a57..5df177070d53 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) { addr = *(unsigned int *)val; - val = record->data + (addr & 0xffff); size = addr >> 16; + addr &= 0xffff; + if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) + addr += arg->str.field->offset + arg->str.field->size; + val = record->data + addr; } /* diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 5d72f3112e56..394ee57d58f2 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -1813,15 +1813,16 @@ spin_trylock() -- we can call these things lock-releases and lock-acquires -- have two properties beyond those of ordinary releases and acquires. -First, when a lock-acquire reads from a lock-release, the LKMM -requires that every instruction po-before the lock-release must -execute before any instruction po-after the lock-acquire. This would -naturally hold if the release and acquire operations were on different -CPUs, but the LKMM says it holds even when they are on the same CPU. -For example: +First, when a lock-acquire reads from or is po-after a lock-release, +the LKMM requires that every instruction po-before the lock-release +must execute before any instruction po-after the lock-acquire. This +would naturally hold if the release and acquire operations were on +different CPUs and accessed the same lock variable, but the LKMM says +it also holds when they are on the same CPU, even if they access +different lock variables. For example: int x, y; - spinlock_t s; + spinlock_t s, t; P0() { @@ -1830,9 +1831,9 @@ For example: spin_lock(&s); r1 = READ_ONCE(x); spin_unlock(&s); - spin_lock(&s); + spin_lock(&t); r2 = READ_ONCE(y); - spin_unlock(&s); + spin_unlock(&t); } P1() @@ -1842,10 +1843,10 @@ For example: WRITE_ONCE(x, 1); } -Here the second spin_lock() reads from the first spin_unlock(), and -therefore the load of x must execute before the load of y. Thus we -cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the -MP pattern). +Here the second spin_lock() is po-after the first spin_unlock(), and +therefore the load of x must execute before the load of y, even though +the two locking operations use different locks. Thus we cannot have +r1 = 1 and r2 = 0 at the end (this is an instance of the MP pattern). This requirement does not apply to ordinary release and acquire fences, only to lock-related operations. For instance, suppose P0() @@ -1872,13 +1873,13 @@ instructions in the following order: and thus it could load y before x, obtaining r2 = 0 and r1 = 1. -Second, when a lock-acquire reads from a lock-release, and some other -stores W and W' occur po-before the lock-release and po-after the -lock-acquire respectively, the LKMM requires that W must propagate to -each CPU before W' does. For example, consider: +Second, when a lock-acquire reads from or is po-after a lock-release, +and some other stores W and W' occur po-before the lock-release and +po-after the lock-acquire respectively, the LKMM requires that W must +propagate to each CPU before W' does. For example, consider: int x, y; - spinlock_t x; + spinlock_t s; P0() { @@ -1908,7 +1909,12 @@ each CPU before W' does. For example, consider: If r1 = 1 at the end then the spin_lock() in P1 must have read from the spin_unlock() in P0. Hence the store to x must propagate to P2 -before the store to y does, so we cannot have r2 = 1 and r3 = 0. +before the store to y does, so we cannot have r2 = 1 and r3 = 0. But +if P1 had used a lock variable different from s, the writes could have +propagated in either order. (On the other hand, if the code in P0 and +P1 had all executed on a single CPU, as in the example before this +one, then the writes would have propagated in order even if the two +critical sections used different lock variables.) These two special requirements for lock-release and lock-acquire do not arise from the operational model. Nevertheless, kernel developers diff --git a/tools/memory-model/README b/tools/memory-model/README index 9a84c45504ab..9edd402704c4 100644 --- a/tools/memory-model/README +++ b/tools/memory-model/README @@ -195,6 +195,18 @@ litmus-tests are listed in litmus-tests/README. A great deal more litmus tests are available at https://github.com/paulmckrcu/litmus. + By "representative", it means the one in the litmus-tests + directory is: + + 1) simple, the number of threads should be relatively + small and each thread function should be relatively + simple. + 2) orthogonal, there should be no two litmus tests + describing the same aspect of the memory model. + 3) textbook, developers can easily copy-paste-modify + the litmus tests to use the patterns on their own + code. + lock.cat Provides a front-end analysis of lock acquisition and release, for example, associating a lock acquisition with the preceding diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index 2a9b4fe4a84e..d70315fddef6 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -27,7 +27,7 @@ include "lock.cat" (* Release Acquire *) let acq-po = [Acquire] ; po ; [M] let po-rel = [M] ; po ; [Release] -let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po +let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po (* Fences *) let R4rmb = R \ Noreturn (* Reads for which rmb works *) @@ -70,12 +70,12 @@ let rwdep = (dep | ctrl) ; [W] let overwrite = co | fr let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb) let to-r = addr | (dep ; [Marked] ; rfi) -let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int) +let ppo = to-r | to-w | fence | (po-unlock-lock-po & int) (* Propagation: Ordering from release operations and strong fences. *) let A-cumul(r) = (rfe ; [Marked])? ; r let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb | - po-unlock-rf-lock-po) ; [Marked] + po-unlock-lock-po) ; [Marked] let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ; [Marked] ; rfe? ; [Marked] diff --git a/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus new file mode 100644 index 000000000000..eb34123a6ffe --- /dev/null +++ b/tools/memory-model/litmus-tests/LB+unlocklockonceonce+poacquireonce.litmus @@ -0,0 +1,35 @@ +C LB+unlocklockonceonce+poacquireonce + +(* + * Result: Never + * + * If two locked critical sections execute on the same CPU, all accesses + * in the first must execute before any accesses in the second, even if the + * critical sections are protected by different locks. Note: Even when a + * write executes before a read, their memory effects can be reordered from + * the viewpoint of another CPU (the kind of reordering allowed by TSO). + *) + +{} + +P0(spinlock_t *s, spinlock_t *t, int *x, int *y) +{ + int r1; + + spin_lock(s); + r1 = READ_ONCE(*x); + spin_unlock(s); + spin_lock(t); + WRITE_ONCE(*y, 1); + spin_unlock(t); +} + +P1(int *x, int *y) +{ + int r2; + + r2 = smp_load_acquire(y); + WRITE_ONCE(*x, 1); +} + +exists (0:r1=1 /\ 1:r2=1) diff --git a/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus new file mode 100644 index 000000000000..2feb1398be71 --- /dev/null +++ b/tools/memory-model/litmus-tests/MP+unlocklockonceonce+fencermbonceonce.litmus @@ -0,0 +1,33 @@ +C MP+unlocklockonceonce+fencermbonceonce + +(* + * Result: Never + * + * If two locked critical sections execute on the same CPU, stores in the + * first must propagate to each CPU before stores in the second do, even if + * the critical sections are protected by different locks. + *) + +{} + +P0(spinlock_t *s, spinlock_t *t, int *x, int *y) +{ + spin_lock(s); + WRITE_ONCE(*x, 1); + spin_unlock(s); + spin_lock(t); + WRITE_ONCE(*y, 1); + spin_unlock(t); +} + +P1(int *x, int *y) +{ + int r1; + int r2; + + r1 = READ_ONCE(*y); + smp_rmb(); + r2 = READ_ONCE(*x); +} + +exists (1:r1=1 /\ 1:r2=0) diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README index 681f9067fa9e..d311a0ff1ae6 100644 --- a/tools/memory-model/litmus-tests/README +++ b/tools/memory-model/litmus-tests/README @@ -63,6 +63,10 @@ LB+poonceonces.litmus As above, but with store-release replaced with WRITE_ONCE() and load-acquire replaced with READ_ONCE(). +LB+unlocklockonceonce+poacquireonce.litmus + Does a unlock+lock pair provides ordering guarantee between a + load and a store? + MP+onceassign+derefonce.litmus As below, but with rcu_assign_pointer() and an rcu_dereference(). @@ -90,6 +94,10 @@ MP+porevlocks.litmus As below, but with the first access of the writer process and the second access of reader process protected by a lock. +MP+unlocklockonceonce+fencermbonceonce.litmus + Does a unlock+lock pair provides ordering guarantee between a + store and another store? + MP+fencewmbonceonce+fencermbonceonce.litmus Does a smp_wmb() (between the stores) and an smp_rmb() (between the loads) suffice for the message-passing litmus test, where one diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 4d6d7fc13255..c10ef78df050 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -531,6 +531,11 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } break; + case 0xcc: + /* int3 */ + *type = INSN_TRAP; + break; + case 0xe3: /* jecxz/jrcxz */ *type = INSN_JUMP_CONDITIONAL; @@ -697,10 +702,10 @@ const char *arch_ret_insn(int len) { static const char ret[5][5] = { { BYTE_RET }, - { BYTE_RET, BYTES_NOP1 }, - { BYTE_RET, BYTES_NOP2 }, - { BYTE_RET, BYTES_NOP3 }, - { BYTE_RET, BYTES_NOP4 }, + { BYTE_RET, 0xcc }, + { BYTE_RET, 0xcc, BYTES_NOP1 }, + { BYTE_RET, 0xcc, BYTES_NOP2 }, + { BYTE_RET, 0xcc, BYTES_NOP3 }, }; if (len < 1 || len > 5) { diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 8b38b5d6fec7..38070f26105b 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,7 @@ #include <objtool/objtool.h> bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup; + validate_dup, vmlinux, mcount, noinstr, backup, sls; static const char * const check_usage[] = { "objtool check [<options>] file.o", @@ -45,6 +45,7 @@ const struct option check_options[] = { OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), + OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), OPT_END(), }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 21735829b860..c2d2ab9a2861 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -168,14 +168,16 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "panic", "do_exit", "do_task_dead", - "__module_put_and_exit", - "complete_and_exit", + "kthread_exit", + "make_task_dead", + "__module_put_and_kthread_exit", + "kthread_complete_and_exit", "__reiserfs_panic", "lbug_with_loc", "fortify_panic", "usercopy_abort", "machine_real_restart", - "rewind_stack_do_exit", + "rewind_stack_and_make_dead", "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", @@ -849,6 +851,10 @@ static const char *uaccess_safe_builtin[] = { "__asan_report_store16_noabort", /* KCSAN */ "__kcsan_check_access", + "__kcsan_mb", + "__kcsan_wmb", + "__kcsan_rmb", + "__kcsan_release", "kcsan_found_watchpoint", "kcsan_setup_watchpoint", "kcsan_check_scoped_accesses", @@ -1068,11 +1074,11 @@ static void annotate_call_site(struct objtool_file *file, } /* - * Many compilers cannot disable KCOV with a function attribute - * so they need a little help, NOP out any KCOV calls from noinstr - * text. + * Many compilers cannot disable KCOV or sanitizer calls with a function + * attribute so they need a little help, NOP out any such calls from + * noinstr text. */ - if (insn->sec->noinstr && sym->kcov) { + if (insn->sec->noinstr && sym->profiling_func) { if (reloc) { reloc->type = R_NONE; elf_write_reloc(file->elf, reloc); @@ -1987,6 +1993,31 @@ static int read_intra_function_calls(struct objtool_file *file) return 0; } +/* + * Return true if name matches an instrumentation function, where calls to that + * function from noinstr code can safely be removed, but compilers won't do so. + */ +static bool is_profiling_func(const char *name) +{ + /* + * Many compilers cannot disable KCOV with a function attribute. + */ + if (!strncmp(name, "__sanitizer_cov_", 16)) + return true; + + /* + * Some compilers currently do not remove __tsan_func_entry/exit nor + * __tsan_atomic_signal_fence (used for barrier instrumentation) with + * the __no_sanitize_thread attribute, remove them. Once the kernel's + * minimum Clang version is 14.0, this can be removed. + */ + if (!strncmp(name, "__tsan_func_", 12) || + !strcmp(name, "__tsan_atomic_signal_fence")) + return true; + + return false; +} + static int classify_symbols(struct objtool_file *file) { struct section *sec; @@ -2007,8 +2038,8 @@ static int classify_symbols(struct objtool_file *file) if (!strcmp(func->name, "__fentry__")) func->fentry = true; - if (!strncmp(func->name, "__sanitizer_cov_", 16)) - func->kcov = true; + if (is_profiling_func(func->name)) + func->profiling_func = true; } } @@ -3084,6 +3115,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: + if (next_insn && next_insn->type == INSN_TRAP) { + next_insn->ignore = true; + } else if (sls && !insn->retpoline_safe) { + WARN_FUNC("missing int3 after ret", + insn->sec, insn->offset); + } return validate_return(func, insn, &state); case INSN_CALL: @@ -3127,6 +3164,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: + if (next_insn && next_insn->type == INSN_TRAP) { + next_insn->ignore = true; + } else if (sls && !insn->retpoline_safe) { + WARN_FUNC("missing int3 after indirect jump", + insn->sec, insn->offset); + } + + /* fallthrough */ case INSN_JUMP_DYNAMIC_CONDITIONAL: if (is_sibling_call(insn)) { ret = validate_sibling_call(file, insn, &state); @@ -3296,14 +3341,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio return true; /* - * Ignore any unused exceptions. This can happen when a whitelisted - * function has an exception table entry. - * - * Also ignore alternative replacement instructions. This can happen + * Ignore alternative replacement instructions. This can happen * when a whitelisted function uses one of the ALTERNATIVE macros. */ - if (!strcmp(insn->sec->name, ".fixup") || - !strcmp(insn->sec->name, ".altinstr_replacement") || + if (!strcmp(insn->sec->name, ".altinstr_replacement") || !strcmp(insn->sec->name, ".altinstr_aux")) return true; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 589ff58426ab..76bae3078286 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -26,6 +26,7 @@ enum insn_type { INSN_CLAC, INSN_STD, INSN_CLD, + INSN_TRAP, INSN_OTHER, }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 15ac0b7d3d6a..89ba869ed08f 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,7 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup; + validate_dup, vmlinux, mcount, noinstr, backup, sls; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index cdc739fa9a6f..d22336781401 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -58,7 +58,7 @@ struct symbol { u8 static_call_tramp : 1; u8 retpoline_thunk : 1; u8 fentry : 1; - u8 kcov : 1; + u8 profiling_func : 1; struct list_head pv_target; }; diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index cd8ce6e8ec12..7e44b419d301 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -74,12 +74,15 @@ OPTIONS used when creating a uprobe for a process that resides in a different mount namespace from the perf(1) utility. ---debuginfod=URLs:: +--debuginfod[=URLs]:: Specify debuginfod URL to be used when retrieving perf.data binaries, it follows the same syntax as the DEBUGINFOD_URLS variable, like: buildid-cache.debuginfod=http://192.168.122.174:8002 + If the URLs is not specified, the value of DEBUGINFOD_URLS + system environment variable is used. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1] diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 3bb75c1f25e8..0420e71698ee 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -587,6 +587,15 @@ record.*:: Use 'n' control blocks in asynchronous (Posix AIO) trace writing mode ('n' default: 1, max: 4). + record.debuginfod:: + Specify debuginfod URL to be used when cacheing perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + http://192.168.122.174:8002 + + If the URLs is 'system', the value of DEBUGINFOD_URLS system environment + variable is used. + diff.*:: diff.order:: This option sets the number of columns to sort the result. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 4dc8d0af19df..57384a97c04f 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU clocks not halted) and 0xC1 (micro-ops retired). Both events map to IBS execution sampling (IBS op) with the IBS Op Counter Control bit -(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +(IbsOpCntCtl) set respectively (see the +Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS) +section of the [AMD Processor Programming Reference (PPR)] relevant to the +family, model and stepping of the processor being used). + Manual Volume 2: System Programming, 13.3 Instruction-Based Sampling). Examples to use IBS: @@ -94,10 +98,12 @@ RAW HARDWARE EVENT DESCRIPTOR Even when an event is not available in a symbolic form within perf right now, it can be encoded in a per processor specific way. -For instance For x86 CPUs NNN represents the raw register encoding with the +For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout -of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, -Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). +of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the +Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the +[AMD Processor Programming Reference (PPR)] relevant to the family, model +and stepping of the processor being used). Note: Only the following bit fields can be set in x86 counter registers: event, umask, edge, inv, cmask. Esp. guest/host only and @@ -126,6 +132,38 @@ It's also possible to use pmu syntax: perf record -e cpu/r1a8/ ... perf record -e cpu/r0x1a8/ ... +Some processors, like those from AMD, support event codes and unit masks +larger than a byte. In such cases, the bits corresponding to the event +configuration parameters can be seen with: + + cat /sys/bus/event_source/devices/<pmu>/format/<config> + +Example: + +If the AMD docs for an EPYC 7713 processor describe an event as: + + Event Umask Event Mask + Num. Value Mnemonic Description + + 28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag + hit events. + +raw encoding of 0x0328F cannot be used since the upper nibble of the +EventSelect bits have to be specified via bits 32-35 as can be seen with: + + cat /sys/bus/event_source/devices/cpu/format/event + +raw encoding of 0x20000038F should be used instead: + + perf stat -e r20000038f -a sleep 1 + perf record -e r20000038f ... + +It's also possible to use pmu syntax: + + perf record -e r20000038f -a sleep 1 + perf record -e cpu/r20000038f/ ... + perf record -e cpu/r0x20000038f/ ... + You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. @@ -316,4 +354,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] +https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 3cf7bac67239..9ccc75935bc5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -30,8 +30,10 @@ OPTIONS - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + - a raw PMU event in the form of rN where N is a hexadecimal value + that represents the raw register encoding with the layout of the + event control registers as described by entries in + /sys/bus/event_sources/devices/cpu/format/*. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the @@ -713,6 +715,15 @@ measurements: include::intel-hybrid.txt[] +--debuginfod[=URLs]:: + Specify debuginfod URL to be used when cacheing perf.data binaries, + it follows the same syntax as the DEBUGINFOD_URLS variable, like: + + http://192.168.122.174:8002 + + If the URLs is not specified, the value of DEBUGINFOD_URLS + system environment variable is used. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 7e6fb7cbc0f4..c06c341e72b9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -36,8 +36,10 @@ report:: - a symbolic event name (use 'perf list' to list all events) - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + - a raw PMU event in the form of rN where N is a hexadecimal value + that represents the raw register encoding with the layout of the + event control registers as described by entries in + /sys/bus/event_sources/devices/cpu/format/*. - a symbolic or raw PMU event followed by an optional colon and a list of event modifiers, e.g., cpu-cycles:p. See the @@ -493,6 +495,10 @@ This option can be enabled in perf config by setting the variable $ perf config stat.no-csv-summary=true +--cputype:: +Only enable events on applying cpu with this type for hybrid platform +(e.g. core or atom)" + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 9898a32b8d9c..cac3dfbee7d8 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -38,9 +38,10 @@ Default is to monitor all CPUS. -e <event>:: --event=<event>:: Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + (use 'perf list' to list all events) or a raw PMU event in the form + of rN where N is a hexadecimal value that represents the raw register + encoding with the layout of the event control registers as described + by entries in /sys/bus/event_sources/devices/cpu/format/*. -E <entries>:: --entries=<entries>:: diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 3df74cf5651a..96ad944ca6a8 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) +HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS)) include $(srctree)/tools/scripts/Makefile.arch @@ -143,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif -OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++ +OPENCSDLIBS := -lopencsd_c_api +ifeq ($(findstring -static,${LDFLAGS}),-static) + OPENCSDLIBS += -lopencsd -lstdc++ +endif ifdef CSLIBS LIBOPENCSD_LDFLAGS := -L$(CSLIBS) endif @@ -211,6 +215,7 @@ endif ifneq ($(WERROR),0) CORE_CFLAGS += -Werror CXXFLAGS += -Werror + HOSTCFLAGS += -Werror endif ifndef DEBUG @@ -290,6 +295,9 @@ CXXFLAGS += -ggdb3 CXXFLAGS += -funwind-tables CXXFLAGS += -Wno-strict-aliasing +HOSTCFLAGS += -Wall +HOSTCFLAGS += -Wextra + # Enforce a non-executable stack, as we may regress (again) in the future by # adding assembler files missing the .GNU-stack linker note. LDFLAGS += -Wl,-z,noexecstack diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 80522bcfafe0..ac861e42c8f7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -226,7 +226,7 @@ else endif export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK -export HOSTCC HOSTLD HOSTAR +export HOSTCC HOSTLD HOSTAR HOSTCFLAGS include $(srctree)/tools/build/Makefile.include @@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h -SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h +SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): $(Q)$(MKDIR) -p $@ diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h index 4085419283d0..99a06550e25d 100644 --- a/tools/perf/arch/arm/include/perf_regs.h +++ b/tools/perf/arch/arm/include/perf_regs.h @@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM_PC #define PERF_REG_SP PERF_REG_ARM_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_ARM_R0: - return "r0"; - case PERF_REG_ARM_R1: - return "r1"; - case PERF_REG_ARM_R2: - return "r2"; - case PERF_REG_ARM_R3: - return "r3"; - case PERF_REG_ARM_R4: - return "r4"; - case PERF_REG_ARM_R5: - return "r5"; - case PERF_REG_ARM_R6: - return "r6"; - case PERF_REG_ARM_R7: - return "r7"; - case PERF_REG_ARM_R8: - return "r8"; - case PERF_REG_ARM_R9: - return "r9"; - case PERF_REG_ARM_R10: - return "r10"; - case PERF_REG_ARM_FP: - return "fp"; - case PERF_REG_ARM_IP: - return "ip"; - case PERF_REG_ARM_SP: - return "sp"; - case PERF_REG_ARM_LR: - return "lr"; - case PERF_REG_ARM_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 293a23bf8be3..2e8b2c4365a0 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr, struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); /* Set option of each CPU we have */ - for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(event_cpus, i) || - !cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(event_cpus, cpu) || + !perf_cpu_map__has(online_cpus, cpu)) continue; if (option & BIT(ETM_OPT_CTXTID)) { @@ -407,25 +409,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, } - /* Validate auxtrace_mmap_pages provided by user */ - if (opts->auxtrace_mmap_pages) { - unsigned int max_page = (KiB(128) / page_size); - size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; - - if (!privileged && - opts->auxtrace_mmap_pages > max_page) { - opts->auxtrace_mmap_pages = max_page; - pr_err("auxtrace too big, truncating to %d\n", - max_page); - } - - if (!is_power_of_2(sz)) { - pr_err("Invalid mmap size for %s: must be a power of 2\n", - CORESIGHT_ETM_PMU_NAME); - return -EINVAL; - } - } - if (opts->auxtrace_snapshot_mode) pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); @@ -541,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, /* cpu map is not empty, we have specific CPUs to work with */ if (!perf_cpu_map__empty(event_cpus)) { - for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(event_cpus, i) || - !cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(event_cpus, cpu) || + !perf_cpu_map__has(online_cpus, cpu)) continue; if (cs_etm_is_ete(itr, i)) @@ -555,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, } } else { /* get configuration for all CPUs in the system */ - for (i = 0; i < cpu__max_cpu(); i++) { - if (!cpu_map__has(online_cpus, i)) + for (i = 0; i < cpu__max_cpu().cpu; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (!perf_cpu_map__has(online_cpus, cpu)) continue; if (cs_etm_is_ete(itr, i)) @@ -741,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, } else { /* Make sure all specified CPUs are online */ for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) { - if (cpu_map__has(event_cpus, i) && - !cpu_map__has(online_cpus, i)) + struct perf_cpu cpu = { .cpu = i, }; + + if (perf_cpu_map__has(event_cpus, cpu) && + !perf_cpu_map__has(online_cpus, cpu)) return -EINVAL; } @@ -762,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, offset = CS_ETM_SNAPSHOT + 1; - for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++) - if (cpu_map__has(cpu_map, i)) + for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) { + struct perf_cpu cpu = { .cpu = i, }; + + if (perf_cpu_map__has(cpu_map, cpu)) cs_etm_get_metadata(i, &offset, itr, info); + } perf_cpu_map__put(online_cpus); diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index fa3e07459f76..35a3cc775b39 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -4,7 +4,9 @@ #include <stdlib.h> #include <linux/types.h> +#define perf_event_arm_regs perf_event_arm64_regs #include <asm/perf_regs.h> +#undef perf_event_arm_regs void perf_regs_load(u64 *regs); @@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_ARM64_X0: - return "x0"; - case PERF_REG_ARM64_X1: - return "x1"; - case PERF_REG_ARM64_X2: - return "x2"; - case PERF_REG_ARM64_X3: - return "x3"; - case PERF_REG_ARM64_X4: - return "x4"; - case PERF_REG_ARM64_X5: - return "x5"; - case PERF_REG_ARM64_X6: - return "x6"; - case PERF_REG_ARM64_X7: - return "x7"; - case PERF_REG_ARM64_X8: - return "x8"; - case PERF_REG_ARM64_X9: - return "x9"; - case PERF_REG_ARM64_X10: - return "x10"; - case PERF_REG_ARM64_X11: - return "x11"; - case PERF_REG_ARM64_X12: - return "x12"; - case PERF_REG_ARM64_X13: - return "x13"; - case PERF_REG_ARM64_X14: - return "x14"; - case PERF_REG_ARM64_X15: - return "x15"; - case PERF_REG_ARM64_X16: - return "x16"; - case PERF_REG_ARM64_X17: - return "x17"; - case PERF_REG_ARM64_X18: - return "x18"; - case PERF_REG_ARM64_X19: - return "x19"; - case PERF_REG_ARM64_X20: - return "x20"; - case PERF_REG_ARM64_X21: - return "x21"; - case PERF_REG_ARM64_X22: - return "x22"; - case PERF_REG_ARM64_X23: - return "x23"; - case PERF_REG_ARM64_X24: - return "x24"; - case PERF_REG_ARM64_X25: - return "x25"; - case PERF_REG_ARM64_X26: - return "x26"; - case PERF_REG_ARM64_X27: - return "x27"; - case PERF_REG_ARM64_X28: - return "x28"; - case PERF_REG_ARM64_X29: - return "x29"; - case PERF_REG_ARM64_SP: - return "sp"; - case PERF_REG_ARM64_LR: - return "lr"; - case PERF_REG_ARM64_PC: - return "pc"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 7e7714290a87..d2ce31e28cd7 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -5,6 +5,8 @@ #include <string.h> #include "debug.h" #include "symbol.h" +#include "callchain.h" +#include "record.h" /* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory @@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c) p->end = c->start; pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end); } + +void arch__add_leaf_frame_record_opts(struct record_opts *opts) +{ + opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask; +} diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index d3a18f9c85f6..79124bba713e 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void) * The cpumap should cover all CPUs. Otherwise, some CPUs may * not support some events or have different event IDs. */ - if (pmu->cpus->nr != cpu__max_cpu()) + if (pmu->cpus->nr != cpu__max_cpu().cpu) return NULL; return perf_pmu__find_map(pmu); diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h index 25ac3bdcb9d1..1afcc0e916c2 100644 --- a/tools/perf/arch/csky/include/perf_regs.h +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -15,86 +15,4 @@ #define PERF_REG_IP PERF_REG_CSKY_PC #define PERF_REG_SP PERF_REG_CSKY_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_CSKY_A0: - return "a0"; - case PERF_REG_CSKY_A1: - return "a1"; - case PERF_REG_CSKY_A2: - return "a2"; - case PERF_REG_CSKY_A3: - return "a3"; - case PERF_REG_CSKY_REGS0: - return "regs0"; - case PERF_REG_CSKY_REGS1: - return "regs1"; - case PERF_REG_CSKY_REGS2: - return "regs2"; - case PERF_REG_CSKY_REGS3: - return "regs3"; - case PERF_REG_CSKY_REGS4: - return "regs4"; - case PERF_REG_CSKY_REGS5: - return "regs5"; - case PERF_REG_CSKY_REGS6: - return "regs6"; - case PERF_REG_CSKY_REGS7: - return "regs7"; - case PERF_REG_CSKY_REGS8: - return "regs8"; - case PERF_REG_CSKY_REGS9: - return "regs9"; - case PERF_REG_CSKY_SP: - return "sp"; - case PERF_REG_CSKY_LR: - return "lr"; - case PERF_REG_CSKY_PC: - return "pc"; -#if defined(__CSKYABIV2__) - case PERF_REG_CSKY_EXREGS0: - return "exregs0"; - case PERF_REG_CSKY_EXREGS1: - return "exregs1"; - case PERF_REG_CSKY_EXREGS2: - return "exregs2"; - case PERF_REG_CSKY_EXREGS3: - return "exregs3"; - case PERF_REG_CSKY_EXREGS4: - return "exregs4"; - case PERF_REG_CSKY_EXREGS5: - return "exregs5"; - case PERF_REG_CSKY_EXREGS6: - return "exregs6"; - case PERF_REG_CSKY_EXREGS7: - return "exregs7"; - case PERF_REG_CSKY_EXREGS8: - return "exregs8"; - case PERF_REG_CSKY_EXREGS9: - return "exregs9"; - case PERF_REG_CSKY_EXREGS10: - return "exregs10"; - case PERF_REG_CSKY_EXREGS11: - return "exregs11"; - case PERF_REG_CSKY_EXREGS12: - return "exregs12"; - case PERF_REG_CSKY_EXREGS13: - return "exregs13"; - case PERF_REG_CSKY_EXREGS14: - return "exregs14"; - case PERF_REG_CSKY_TLS: - return "tls"; - case PERF_REG_CSKY_HI: - return "hi"; - case PERF_REG_CSKY_LO: - return "lo"; -#endif - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h index ee73b36a14d1..b8cd8bbb37ba 100644 --- a/tools/perf/arch/mips/include/perf_regs.h +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -12,73 +12,4 @@ #define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_MIPS_PC: - return "PC"; - case PERF_REG_MIPS_R1: - return "$1"; - case PERF_REG_MIPS_R2: - return "$2"; - case PERF_REG_MIPS_R3: - return "$3"; - case PERF_REG_MIPS_R4: - return "$4"; - case PERF_REG_MIPS_R5: - return "$5"; - case PERF_REG_MIPS_R6: - return "$6"; - case PERF_REG_MIPS_R7: - return "$7"; - case PERF_REG_MIPS_R8: - return "$8"; - case PERF_REG_MIPS_R9: - return "$9"; - case PERF_REG_MIPS_R10: - return "$10"; - case PERF_REG_MIPS_R11: - return "$11"; - case PERF_REG_MIPS_R12: - return "$12"; - case PERF_REG_MIPS_R13: - return "$13"; - case PERF_REG_MIPS_R14: - return "$14"; - case PERF_REG_MIPS_R15: - return "$15"; - case PERF_REG_MIPS_R16: - return "$16"; - case PERF_REG_MIPS_R17: - return "$17"; - case PERF_REG_MIPS_R18: - return "$18"; - case PERF_REG_MIPS_R19: - return "$19"; - case PERF_REG_MIPS_R20: - return "$20"; - case PERF_REG_MIPS_R21: - return "$21"; - case PERF_REG_MIPS_R22: - return "$22"; - case PERF_REG_MIPS_R23: - return "$23"; - case PERF_REG_MIPS_R24: - return "$24"; - case PERF_REG_MIPS_R25: - return "$25"; - case PERF_REG_MIPS_R28: - return "$28"; - case PERF_REG_MIPS_R29: - return "$29"; - case PERF_REG_MIPS_R30: - return "$30"; - case PERF_REG_MIPS_R31: - return "$31"; - default: - break; - } - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 93339d17acc4..9bb17c3f370b 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_POWERPC_NIP #define PERF_REG_SP PERF_REG_POWERPC_R1 -static const char *reg_names[] = { - [PERF_REG_POWERPC_R0] = "r0", - [PERF_REG_POWERPC_R1] = "r1", - [PERF_REG_POWERPC_R2] = "r2", - [PERF_REG_POWERPC_R3] = "r3", - [PERF_REG_POWERPC_R4] = "r4", - [PERF_REG_POWERPC_R5] = "r5", - [PERF_REG_POWERPC_R6] = "r6", - [PERF_REG_POWERPC_R7] = "r7", - [PERF_REG_POWERPC_R8] = "r8", - [PERF_REG_POWERPC_R9] = "r9", - [PERF_REG_POWERPC_R10] = "r10", - [PERF_REG_POWERPC_R11] = "r11", - [PERF_REG_POWERPC_R12] = "r12", - [PERF_REG_POWERPC_R13] = "r13", - [PERF_REG_POWERPC_R14] = "r14", - [PERF_REG_POWERPC_R15] = "r15", - [PERF_REG_POWERPC_R16] = "r16", - [PERF_REG_POWERPC_R17] = "r17", - [PERF_REG_POWERPC_R18] = "r18", - [PERF_REG_POWERPC_R19] = "r19", - [PERF_REG_POWERPC_R20] = "r20", - [PERF_REG_POWERPC_R21] = "r21", - [PERF_REG_POWERPC_R22] = "r22", - [PERF_REG_POWERPC_R23] = "r23", - [PERF_REG_POWERPC_R24] = "r24", - [PERF_REG_POWERPC_R25] = "r25", - [PERF_REG_POWERPC_R26] = "r26", - [PERF_REG_POWERPC_R27] = "r27", - [PERF_REG_POWERPC_R28] = "r28", - [PERF_REG_POWERPC_R29] = "r29", - [PERF_REG_POWERPC_R30] = "r30", - [PERF_REG_POWERPC_R31] = "r31", - [PERF_REG_POWERPC_NIP] = "nip", - [PERF_REG_POWERPC_MSR] = "msr", - [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", - [PERF_REG_POWERPC_CTR] = "ctr", - [PERF_REG_POWERPC_LINK] = "link", - [PERF_REG_POWERPC_XER] = "xer", - [PERF_REG_POWERPC_CCR] = "ccr", - [PERF_REG_POWERPC_SOFTE] = "softe", - [PERF_REG_POWERPC_TRAP] = "trap", - [PERF_REG_POWERPC_DAR] = "dar", - [PERF_REG_POWERPC_DSISR] = "dsisr", - [PERF_REG_POWERPC_SIER] = "sier", - [PERF_REG_POWERPC_MMCRA] = "mmcra", - [PERF_REG_POWERPC_MMCR0] = "mmcr0", - [PERF_REG_POWERPC_MMCR1] = "mmcr1", - [PERF_REG_POWERPC_MMCR2] = "mmcr2", - [PERF_REG_POWERPC_MMCR3] = "mmcr3", - [PERF_REG_POWERPC_SIER2] = "sier2", - [PERF_REG_POWERPC_SIER3] = "sier3", - [PERF_REG_POWERPC_PMC1] = "pmc1", - [PERF_REG_POWERPC_PMC2] = "pmc2", - [PERF_REG_POWERPC_PMC3] = "pmc3", - [PERF_REG_POWERPC_PMC4] = "pmc4", - [PERF_REG_POWERPC_PMC5] = "pmc5", - [PERF_REG_POWERPC_PMC6] = "pmc6", - [PERF_REG_POWERPC_SDAR] = "sdar", - [PERF_REG_POWERPC_SIAR] = "siar", -}; - -static inline const char *__perf_reg_name(int id) -{ - return reg_names[id]; -} #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index 3bf441257466..cf430a4c55b9 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header) { if (!strcmp(se_header, "Local INSTR Latency")) return "Finish Cyc"; - else if (!strcmp(se_header, "Pipeline Stage Cycle")) + else if (!strcmp(se_header, "INSTR Latency")) + return "Global Finish_cyc"; + else if (!strcmp(se_header, "Local Pipeline Stage Cycle")) return "Dispatch Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Global Dispatch_cyc"; return se_header; } @@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key) { if (!strcmp(sort_key, "p_stage_cyc")) return 1; + if (!strcmp(sort_key, "local_p_stage_cyc")) + return 1; return 0; } diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h index 6b02a767c918..6944bf0de53e 100644 --- a/tools/perf/arch/riscv/include/perf_regs.h +++ b/tools/perf/arch/riscv/include/perf_regs.h @@ -19,78 +19,4 @@ #define PERF_REG_IP PERF_REG_RISCV_PC #define PERF_REG_SP PERF_REG_RISCV_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_RISCV_PC: - return "pc"; - case PERF_REG_RISCV_RA: - return "ra"; - case PERF_REG_RISCV_SP: - return "sp"; - case PERF_REG_RISCV_GP: - return "gp"; - case PERF_REG_RISCV_TP: - return "tp"; - case PERF_REG_RISCV_T0: - return "t0"; - case PERF_REG_RISCV_T1: - return "t1"; - case PERF_REG_RISCV_T2: - return "t2"; - case PERF_REG_RISCV_S0: - return "s0"; - case PERF_REG_RISCV_S1: - return "s1"; - case PERF_REG_RISCV_A0: - return "a0"; - case PERF_REG_RISCV_A1: - return "a1"; - case PERF_REG_RISCV_A2: - return "a2"; - case PERF_REG_RISCV_A3: - return "a3"; - case PERF_REG_RISCV_A4: - return "a4"; - case PERF_REG_RISCV_A5: - return "a5"; - case PERF_REG_RISCV_A6: - return "a6"; - case PERF_REG_RISCV_A7: - return "a7"; - case PERF_REG_RISCV_S2: - return "s2"; - case PERF_REG_RISCV_S3: - return "s3"; - case PERF_REG_RISCV_S4: - return "s4"; - case PERF_REG_RISCV_S5: - return "s5"; - case PERF_REG_RISCV_S6: - return "s6"; - case PERF_REG_RISCV_S7: - return "s7"; - case PERF_REG_RISCV_S8: - return "s8"; - case PERF_REG_RISCV_S9: - return "s9"; - case PERF_REG_RISCV_S10: - return "s10"; - case PERF_REG_RISCV_S11: - return "s11"; - case PERF_REG_RISCV_T3: - return "t3"; - case PERF_REG_RISCV_T4: - return "t4"; - case PERF_REG_RISCV_T5: - return "t5"; - case PERF_REG_RISCV_T6: - return "t6"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index ce3031526623..52fcc0891da6 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_S390_PC #define PERF_REG_SP PERF_REG_S390_R15 -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_S390_R0: - return "R0"; - case PERF_REG_S390_R1: - return "R1"; - case PERF_REG_S390_R2: - return "R2"; - case PERF_REG_S390_R3: - return "R3"; - case PERF_REG_S390_R4: - return "R4"; - case PERF_REG_S390_R5: - return "R5"; - case PERF_REG_S390_R6: - return "R6"; - case PERF_REG_S390_R7: - return "R7"; - case PERF_REG_S390_R8: - return "R8"; - case PERF_REG_S390_R9: - return "R9"; - case PERF_REG_S390_R10: - return "R10"; - case PERF_REG_S390_R11: - return "R11"; - case PERF_REG_S390_R12: - return "R12"; - case PERF_REG_S390_R13: - return "R13"; - case PERF_REG_S390_R14: - return "R14"; - case PERF_REG_S390_R15: - return "R15"; - case PERF_REG_S390_FP0: - return "FP0"; - case PERF_REG_S390_FP1: - return "FP1"; - case PERF_REG_S390_FP2: - return "FP2"; - case PERF_REG_S390_FP3: - return "FP3"; - case PERF_REG_S390_FP4: - return "FP4"; - case PERF_REG_S390_FP5: - return "FP5"; - case PERF_REG_S390_FP6: - return "FP6"; - case PERF_REG_S390_FP7: - return "FP7"; - case PERF_REG_S390_FP8: - return "FP8"; - case PERF_REG_S390_FP9: - return "FP9"; - case PERF_REG_S390_FP10: - return "FP10"; - case PERF_REG_S390_FP11: - return "FP11"; - case PERF_REG_S390_FP12: - return "FP12"; - case PERF_REG_S390_FP13: - return "FP13"; - case PERF_REG_S390_FP14: - return "FP14"; - case PERF_REG_S390_FP15: - return "FP15"; - case PERF_REG_S390_MASK: - return "MASK"; - case PERF_REG_S390_PC: - return "PC"; - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index cddc4cdc0d9b..16e23b722042 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP -static inline const char *__perf_reg_name(int id) -{ - switch (id) { - case PERF_REG_X86_AX: - return "AX"; - case PERF_REG_X86_BX: - return "BX"; - case PERF_REG_X86_CX: - return "CX"; - case PERF_REG_X86_DX: - return "DX"; - case PERF_REG_X86_SI: - return "SI"; - case PERF_REG_X86_DI: - return "DI"; - case PERF_REG_X86_BP: - return "BP"; - case PERF_REG_X86_SP: - return "SP"; - case PERF_REG_X86_IP: - return "IP"; - case PERF_REG_X86_FLAGS: - return "FLAGS"; - case PERF_REG_X86_CS: - return "CS"; - case PERF_REG_X86_SS: - return "SS"; - case PERF_REG_X86_DS: - return "DS"; - case PERF_REG_X86_ES: - return "ES"; - case PERF_REG_X86_FS: - return "FS"; - case PERF_REG_X86_GS: - return "GS"; -#ifdef HAVE_ARCH_X86_64_SUPPORT - case PERF_REG_X86_R8: - return "R8"; - case PERF_REG_X86_R9: - return "R9"; - case PERF_REG_X86_R10: - return "R10"; - case PERF_REG_X86_R11: - return "R11"; - case PERF_REG_X86_R12: - return "R12"; - case PERF_REG_X86_R13: - return "R13"; - case PERF_REG_X86_R14: - return "R14"; - case PERF_REG_X86_R15: - return "R15"; -#endif /* HAVE_ARCH_X86_64_SUPPORT */ - -#define XMM(x) \ - case PERF_REG_X86_XMM ## x: \ - case PERF_REG_X86_XMM ## x + 1: \ - return "XMM" #x; - XMM(0) - XMM(1) - XMM(2) - XMM(3) - XMM(4) - XMM(5) - XMM(6) - XMM(7) - XMM(8) - XMM(9) - XMM(10) - XMM(11) - XMM(12) - XMM(13) - XMM(14) - XMM(15) -#undef XMM - default: - return NULL; - } - - return NULL; -} - #endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index 0b0951030a2f..f924246eff78 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist) else return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL); } + +struct evsel *arch_evlist__leader(struct list_head *list) +{ + struct evsel *evsel, *first; + + first = list_first_entry(list, struct evsel, core.node); + + if (!pmu_have_event("cpu", "slots")) + return first; + + __evlist__for_each_entry(list, evsel) { + if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") && + evsel->name && strstr(evsel->name, "slots")) + return evsel; + } + return first; +} diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index ddaca75c3bc0..1a17ec83d3c4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 79d13dbc0a47..0d1dd8879197 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) if (!noaffinity) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fcdea3e44937..9627b6ab8670 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv) goto errmem; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 137890f78e17..a512a320df74 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr, worker[i].futex = &global_futex; CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index f7a5ffebb940..aca47ce8b1e7 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -131,7 +131,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 0983f40b4b40..888ee6037945 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 2226a475e782..aa82db51c0ab 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -105,7 +105,7 @@ static void block_threads(pthread_t *w, /* create and block all threads */ for (i = 0; i < params.nthreads; i++) { CPU_ZERO(&cpuset); - CPU_SET(cpu->map[i % cpu->nr], &cpuset); + CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset); if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index d0895162c2ba..d291f3a8af5f 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -226,7 +226,6 @@ static void run_collection(struct collection *coll) if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); - fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv); @@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv) struct collection *coll; int ret = 0; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + if (argc < 2) { /* No collection specified. */ print_usage(); @@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); - fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 0db3cfc04c47..cd381693658b 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -351,10 +351,14 @@ static int build_id_cache__show_all(void) static int perf_buildid_cache_config(const char *var, const char *value, void *cb) { - const char **debuginfod = cb; + struct perf_debuginfod *di = cb; - if (!strcmp(var, "buildid-cache.debuginfod")) - *debuginfod = strdup(value); + if (!strcmp(var, "buildid-cache.debuginfod")) { + di->urls = strdup(value); + if (!di->urls) + return -ENOMEM; + di->set = true; + } return 0; } @@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv) *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, - *kcore_filename = NULL, - *debuginfod = NULL; + *kcore_filename = NULL; + struct perf_debuginfod debuginfod = { }; char sbuf[STRERR_BUFSIZE]; struct perf_data data = { @@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv) OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_STRING('u', "update", &update_name_list_str, "file list", "file(s) to update"), - OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url", - "set debuginfod url"), + OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls, + &debuginfod.set, "debuginfod urls", + "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", + "system"), OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"), OPT_END() @@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv) if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); - if (debuginfod) { - pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod); - setenv("DEBUGINFOD_URLS", debuginfod, 1); - } + perf_debuginfod_setup(&debuginfod); /* -l is exclusive. It can not be used with other options. */ if (list_files && opts_flag) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b5c67ef73862..77dd4afacca4 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session) { struct numa_node *n; unsigned long **nodes; - int node, cpu; + int node, idx; + struct perf_cpu cpu; int *cpu2node; if (c2c.node_info > 2) @@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session) if (!cpu2node) return -ENOMEM; - for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) - cpu2node[cpu] = -1; + for (idx = 0; idx < c2c.cpus_cnt; idx++) + cpu2node[idx] = -1; c2c.cpu2node = cpu2node; @@ -2057,13 +2058,13 @@ static int setup_nodes(struct perf_session *session) if (perf_cpu_map__empty(map)) continue; - for (cpu = 0; cpu < map->nr; cpu++) { - set_bit(map->map[cpu], set); + perf_cpu_map__for_each_cpu(cpu, idx, map) { + set_bit(cpu.cpu, set); - if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) + if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug")) return -EINVAL; - cpu2node[map->map[cpu]] = node; + cpu2node[cpu.cpu] = node; } } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 87cb11a7a3ee..71452599f87d 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -13,7 +13,9 @@ #include <signal.h> #include <stdlib.h> #include <fcntl.h> +#include <math.h> #include <poll.h> +#include <ctype.h> #include <linux/capability.h> #include <linux/string.h> @@ -28,36 +30,12 @@ #include "strfilter.h" #include "util/cap.h" #include "util/config.h" +#include "util/ftrace.h" #include "util/units.h" #include "util/parse-sublevel-options.h" #define DEFAULT_TRACER "function_graph" -struct perf_ftrace { - struct evlist *evlist; - struct target target; - const char *tracer; - struct list_head filters; - struct list_head notrace; - struct list_head graph_funcs; - struct list_head nograph_funcs; - int graph_depth; - unsigned long percpu_buffer_size; - bool inherit; - int func_stack_trace; - int func_irq_info; - int graph_nosleep_time; - int graph_noirqs; - int graph_verbose; - int graph_thresh; - unsigned int initial_delay; -}; - -struct filter_entry { - struct list_head list; - char name[]; -}; - static volatile int workload_exec_errno; static bool done; @@ -303,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap) int ret; int last_cpu; - last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1); + last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1).cpu; mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ @@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace) return 0; } -static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) +static void select_tracer(struct perf_ftrace *ftrace) +{ + bool graph = !list_empty(&ftrace->graph_funcs) || + !list_empty(&ftrace->nograph_funcs); + bool func = !list_empty(&ftrace->filters) || + !list_empty(&ftrace->notrace); + + /* The function_graph has priority over function tracer. */ + if (graph) + ftrace->tracer = "function_graph"; + else if (func) + ftrace->tracer = "function"; + /* Otherwise, the default tracer is used. */ + + pr_debug("%s tracer is used\n", ftrace->tracer); +} + +static int __cmd_ftrace(struct perf_ftrace *ftrace) { char *trace_file; int trace_fd; @@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) return -1; } - signal(SIGINT, sig_handler); - signal(SIGUSR1, sig_handler); - signal(SIGCHLD, sig_handler); - signal(SIGPIPE, sig_handler); + select_tracer(ftrace); if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); @@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) if (write_tracing_file("trace", "0") < 0) goto out; - if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false, - ftrace__workload_exec_failed_signal) < 0) { - goto out; - } - if (set_tracing_options(ftrace) < 0) goto out_reset; @@ -693,6 +680,270 @@ out: return (done && !workload_exec_errno) ? 0 : -1; } +static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf) +{ + char *p, *q; + char *unit; + double num; + int i; + + /* ensure NUL termination */ + buf[len] = '\0'; + + /* handle data line by line */ + for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) { + *q = '\0'; + /* move it to the line buffer */ + strcat(linebuf, p); + + /* + * parse trace output to get function duration like in + * + * # tracer: function_graph + * # + * # CPU DURATION FUNCTION CALLS + * # | | | | | | | + * 1) + 10.291 us | do_filp_open(); + * 1) 4.889 us | do_filp_open(); + * 1) 6.086 us | do_filp_open(); + * + */ + if (linebuf[0] == '#') + goto next; + + /* ignore CPU */ + p = strchr(linebuf, ')'); + if (p == NULL) + p = linebuf; + + while (*p && !isdigit(*p) && (*p != '|')) + p++; + + /* no duration */ + if (*p == '\0' || *p == '|') + goto next; + + num = strtod(p, &unit); + if (!unit || strncmp(unit, " us", 3)) + goto next; + + i = log2(num); + if (i < 0) + i = 0; + if (i >= NUM_BUCKET) + i = NUM_BUCKET - 1; + + buckets[i]++; + +next: + /* empty the line buffer for the next output */ + linebuf[0] = '\0'; + } + + /* preserve any remaining output (before newline) */ + strcat(linebuf, p); +} + +static void display_histogram(int buckets[]) +{ + int i; + int total = 0; + int bar_total = 46; /* to fit in 80 column */ + char bar[] = "###############################################"; + int bar_len; + + for (i = 0; i < NUM_BUCKET; i++) + total += buckets[i]; + + if (total == 0) { + printf("No data found\n"); + return; + } + + printf("# %14s | %10s | %-*s |\n", + " DURATION ", "COUNT", bar_total, "GRAPH"); + + bar_len = buckets[0] * bar_total / total; + printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); + + for (i = 1; i < NUM_BUCKET - 1; i++) { + int start = (1 << (i - 1)); + int stop = 1 << i; + const char *unit = "us"; + + if (start >= 1024) { + start >>= 10; + stop >>= 10; + unit = "ms"; + } + bar_len = buckets[i] * bar_total / total; + printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + start, stop, unit, buckets[i], bar_len, bar, + bar_total - bar_len, ""); + } + + bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; + printf(" %4d - %-4s %s | %10d | %.*s%*s |\n", + 1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar, + bar_total - bar_len, ""); + +} + +static int prepare_func_latency(struct perf_ftrace *ftrace) +{ + char *trace_file; + int fd; + + if (ftrace->target.use_bpf) + return perf_ftrace__latency_prepare_bpf(ftrace); + + if (reset_tracing_files(ftrace) < 0) { + pr_err("failed to reset ftrace\n"); + return -1; + } + + /* reset ftrace buffer */ + if (write_tracing_file("trace", "0") < 0) + return -1; + + if (set_tracing_options(ftrace) < 0) + return -1; + + /* force to use the function_graph tracer to track duration */ + if (write_tracing_file("current_tracer", "function_graph") < 0) { + pr_err("failed to set current_tracer to function_graph\n"); + return -1; + } + + trace_file = get_tracing_file("trace_pipe"); + if (!trace_file) { + pr_err("failed to open trace_pipe\n"); + return -1; + } + + fd = open(trace_file, O_RDONLY); + if (fd < 0) + pr_err("failed to open trace_pipe\n"); + + put_tracing_file(trace_file); + return fd; +} + +static int start_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_start_bpf(ftrace); + + if (write_tracing_file("tracing_on", "1") < 0) { + pr_err("can't enable tracing\n"); + return -1; + } + + return 0; +} + +static int stop_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_stop_bpf(ftrace); + + write_tracing_file("tracing_on", "0"); + return 0; +} + +static int read_func_latency(struct perf_ftrace *ftrace, int buckets[]) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_read_bpf(ftrace, buckets); + + return 0; +} + +static int cleanup_func_latency(struct perf_ftrace *ftrace) +{ + if (ftrace->target.use_bpf) + return perf_ftrace__latency_cleanup_bpf(ftrace); + + reset_tracing_files(ftrace); + return 0; +} + +static int __cmd_latency(struct perf_ftrace *ftrace) +{ + int trace_fd; + char buf[4096]; + char line[256]; + struct pollfd pollfd = { + .events = POLLIN, + }; + int buckets[NUM_BUCKET] = { }; + + if (!(perf_cap__capable(CAP_PERFMON) || + perf_cap__capable(CAP_SYS_ADMIN))) { + pr_err("ftrace only works for %s!\n", +#ifdef HAVE_LIBCAP_SUPPORT + "users with the CAP_PERFMON or CAP_SYS_ADMIN capability" +#else + "root" +#endif + ); + return -1; + } + + trace_fd = prepare_func_latency(ftrace); + if (trace_fd < 0) + goto out; + + fcntl(trace_fd, F_SETFL, O_NONBLOCK); + pollfd.fd = trace_fd; + + if (start_func_latency(ftrace) < 0) + goto out; + + evlist__start_workload(ftrace->evlist); + + line[0] = '\0'; + while (!done) { + if (poll(&pollfd, 1, -1) < 0) + break; + + if (pollfd.revents & POLLIN) { + int n = read(trace_fd, buf, sizeof(buf) - 1); + if (n < 0) + break; + + make_histogram(buckets, buf, n, line); + } + } + + stop_func_latency(ftrace); + + if (workload_exec_errno) { + const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); + pr_err("workload failed: %s\n", emsg); + goto out; + } + + /* read remaining buffer contents */ + while (!ftrace->target.use_bpf) { + int n = read(trace_fd, buf, sizeof(buf) - 1); + if (n <= 0) + break; + make_histogram(buckets, buf, n, line); + } + + read_func_latency(ftrace, buckets); + + display_histogram(buckets); + +out: + close(trace_fd); + cleanup_func_latency(ftrace); + + return (done && !workload_exec_errno) ? 0 : -1; +} + static int perf_ftrace_config(const char *var, const char *value, void *cb) { struct perf_ftrace *ftrace = cb; @@ -855,22 +1106,11 @@ static int parse_graph_tracer_opts(const struct option *opt, return 0; } -static void select_tracer(struct perf_ftrace *ftrace) -{ - bool graph = !list_empty(&ftrace->graph_funcs) || - !list_empty(&ftrace->nograph_funcs); - bool func = !list_empty(&ftrace->filters) || - !list_empty(&ftrace->notrace); - - /* The function_graph has priority over function tracer. */ - if (graph) - ftrace->tracer = "function_graph"; - else if (func) - ftrace->tracer = "function"; - /* Otherwise, the default tracer is used. */ - - pr_debug("%s tracer is used\n", ftrace->tracer); -} +enum perf_ftrace_subcommand { + PERF_FTRACE_NONE, + PERF_FTRACE_TRACE, + PERF_FTRACE_LATENCY, +}; int cmd_ftrace(int argc, const char **argv) { @@ -879,17 +1119,7 @@ int cmd_ftrace(int argc, const char **argv) .tracer = DEFAULT_TRACER, .target = { .uid = UINT_MAX, }, }; - const char * const ftrace_usage[] = { - "perf ftrace [<options>] [<command>]", - "perf ftrace [<options>] -- <command> [<options>]", - NULL - }; - const struct option ftrace_options[] = { - OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", - "Tracer to use: function_graph(default) or function"), - OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", - "Show available functions to filter", - opt_list_avail_functions, "*"), + const struct option common_options[] = { OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ @@ -901,6 +1131,14 @@ int cmd_ftrace(int argc, const char **argv) "System-wide collection from all CPUs"), OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu", "List of cpus to monitor"), + OPT_END() + }; + const struct option ftrace_options[] = { + OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", + "Tracer to use: function_graph(default) or function"), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show available functions to filter", + opt_list_avail_functions, "*"), OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", "Trace given functions using function tracer", parse_filter_func), @@ -923,24 +1161,65 @@ int cmd_ftrace(int argc, const char **argv) "Trace children processes"), OPT_UINTEGER('D', "delay", &ftrace.initial_delay, "Number of milliseconds to wait before starting tracing after program start"), - OPT_END() + OPT_PARENT(common_options), + }; + const struct option latency_options[] = { + OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func", + "Show latency of given function", parse_filter_func), +#ifdef HAVE_BPF_SKEL + OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf, + "Use BPF to measure function latency"), +#endif + OPT_PARENT(common_options), + }; + const struct option *options = ftrace_options; + + const char * const ftrace_usage[] = { + "perf ftrace [<options>] [<command>]", + "perf ftrace [<options>] -- [<command>] [<options>]", + "perf ftrace {trace|latency} [<options>] [<command>]", + "perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]", + NULL }; + enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE; INIT_LIST_HEAD(&ftrace.filters); INIT_LIST_HEAD(&ftrace.notrace); INIT_LIST_HEAD(&ftrace.graph_funcs); INIT_LIST_HEAD(&ftrace.nograph_funcs); + signal(SIGINT, sig_handler); + signal(SIGUSR1, sig_handler); + signal(SIGCHLD, sig_handler); + signal(SIGPIPE, sig_handler); + ret = perf_config(perf_ftrace_config, &ftrace); if (ret < 0) return -1; - argc = parse_options(argc, argv, ftrace_options, ftrace_usage, - PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc && target__none(&ftrace.target)) - ftrace.target.system_wide = true; + if (argc > 1) { + if (!strcmp(argv[1], "trace")) { + subcmd = PERF_FTRACE_TRACE; + } else if (!strcmp(argv[1], "latency")) { + subcmd = PERF_FTRACE_LATENCY; + options = latency_options; + } + + if (subcmd != PERF_FTRACE_NONE) { + argc--; + argv++; + } + } + /* for backward compatibility */ + if (subcmd == PERF_FTRACE_NONE) + subcmd = PERF_FTRACE_TRACE; - select_tracer(&ftrace); + argc = parse_options(argc, argv, options, ftrace_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc < 0) { + ret = -EINVAL; + goto out_delete_filters; + } ret = target__validate(&ftrace.target); if (ret) { @@ -961,7 +1240,35 @@ int cmd_ftrace(int argc, const char **argv) if (ret < 0) goto out_delete_evlist; - ret = __cmd_ftrace(&ftrace, argc, argv); + if (argc) { + ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target, + argv, false, + ftrace__workload_exec_failed_signal); + if (ret < 0) + goto out_delete_evlist; + } + + switch (subcmd) { + case PERF_FTRACE_TRACE: + if (!argc && target__none(&ftrace.target)) + ftrace.target.system_wide = true; + ret = __cmd_ftrace(&ftrace); + break; + case PERF_FTRACE_LATENCY: + if (list_empty(&ftrace.filters)) { + pr_err("Should provide a function to measure\n"); + parse_options_usage(ftrace_usage, options, "T", 1); + ret = -EINVAL; + goto out_delete_evlist; + } + ret = __cmd_latency(&ftrace); + break; + case PERF_FTRACE_NONE: + default: + pr_err("Invalid subcommand\n"); + ret = -EINVAL; + break; + } out_delete_evlist: evlist__delete(ftrace.evlist); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index da03a341c63c..99d7ff9a8eff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp int ret = evsel__process_alloc_event(evsel, sample); if (!ret) { - int node1 = cpu__get_node(sample->cpu), + int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), node2 = evsel__intval(evsel, sample, "node"); if (node1 != node2) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0338b813585a..bb716c953d02 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -111,6 +111,7 @@ struct record { unsigned long long samples; struct mmap_cpu_mask affinity_mask; unsigned long output_max_size; /* = 0: unlimited */ + struct perf_debuginfod debuginfod; }; static volatile int done; @@ -2177,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb) rec->opts.nr_cblocks = nr_cblocks_default; } #endif + if (!strcmp(var, "record.debuginfod")) { + rec->debuginfod.urls = strdup(value); + if (!rec->debuginfod.urls) + return -ENOMEM; + rec->debuginfod.set = true; + } return 0; } @@ -2267,6 +2274,10 @@ out_free: return ret; } +void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused) +{ +} + static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) @@ -2663,6 +2674,10 @@ static struct option __record_options[] = { parse_control_option), OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", "Fine-tune event synthesis: default=all", parse_record_synth_option), + OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls, + &record.debuginfod.set, "debuginfod urls", + "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls", + "system"), OPT_END() }; @@ -2716,6 +2731,8 @@ int cmd_record(int argc, const char **argv) if (err) return err; + perf_debuginfod_setup(&record.debuginfod); + /* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target)) rec->opts.target.system_wide = true; @@ -2792,7 +2809,7 @@ int cmd_record(int argc, const char **argv) symbol__init(NULL); if (rec->opts.affinity != PERF_AFFINITY_SYS) { - rec->affinity_mask.nbits = cpu__max_cpu(); + rec->affinity_mask.nbits = cpu__max_cpu().cpu; rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits); if (!rec->affinity_mask.bits) { pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); @@ -2898,6 +2915,10 @@ int cmd_record(int argc, const char **argv) } rec->opts.target.hybrid = perf_pmu__has_hybrid(); + + if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP) + arch__add_leaf_frame_record_opts(&rec->opts); + err = -ENOMEM; if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8ae400429870..1dd92d8c9279 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep) } } - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env)); if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" @@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused, * on events sample_type. */ sample_type = evlist__combined_sample_type(*pevlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); return 0; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4527f632ebe4..72d446de9c60 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -167,7 +167,7 @@ struct trace_sched_handler { struct perf_sched_map { DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS); - int *comp_cpus; + struct perf_cpu *comp_cpus; bool comp; struct perf_thread_map *color_pids; const char *color_pids_str; @@ -191,7 +191,7 @@ struct perf_sched { * Track the current task - that way we can know whether there's any * weird events, such as a task being switched away that is not current. */ - int max_cpu; + struct perf_cpu max_cpu; u32 curr_pid[MAX_CPUS]; struct thread *curr_thread[MAX_CPUS]; char next_shortname1; @@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, int new_shortname; u64 timestamp0, timestamp = sample->time; s64 delta; - int i, this_cpu = sample->cpu; + int i; + struct perf_cpu this_cpu = { + .cpu = sample->cpu, + }; int cpus_nr; bool new_cpu = false; const char *color = PERF_COLOR_NORMAL; char stimestamp[32]; - BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); + BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0); - if (this_cpu > sched->max_cpu) + if (this_cpu.cpu > sched->max_cpu.cpu) sched->max_cpu = this_cpu; if (sched->map.comp) { cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS); - if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) { + if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) { sched->map.comp_cpus[cpus_nr++] = this_cpu; new_cpu = true; } } else - cpus_nr = sched->max_cpu; + cpus_nr = sched->max_cpu.cpu; - timestamp0 = sched->cpu_last_switched[this_cpu]; - sched->cpu_last_switched[this_cpu] = timestamp; + timestamp0 = sched->cpu_last_switched[this_cpu.cpu]; + sched->cpu_last_switched[this_cpu.cpu] = timestamp; if (timestamp0) delta = timestamp - timestamp0; else @@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, return -1; } - sched->curr_thread[this_cpu] = thread__get(sched_in); + sched->curr_thread[this_cpu.cpu] = thread__get(sched_in); printf(" "); @@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } for (i = 0; i < cpus_nr; i++) { - int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i; - struct thread *curr_thread = sched->curr_thread[cpu]; + struct perf_cpu cpu = { + .cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i, + }; + struct thread *curr_thread = sched->curr_thread[cpu.cpu]; struct thread_runtime *curr_tr; const char *pid_color = color; const char *cpu_color = color; @@ -1617,19 +1622,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, if (curr_thread && thread__has_color(curr_thread)) pid_color = COLOR_PIDS; - if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu)) + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu)) continue; - if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu)) + if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu)) cpu_color = COLOR_CPUS; - if (cpu != this_cpu) + if (cpu.cpu != this_cpu.cpu) color_fprintf(stdout, color, " "); else color_fprintf(stdout, cpu_color, "*"); - if (sched->curr_thread[cpu]) { - curr_tr = thread__get_runtime(sched->curr_thread[cpu]); + if (sched->curr_thread[cpu.cpu]) { + curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]); if (curr_tr == NULL) { thread__put(sched_in); return -1; @@ -1639,7 +1644,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, color_fprintf(stdout, color, " "); } - if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu)) + if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu)) goto out; timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp)); @@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread) static void timehist_header(struct perf_sched *sched) { - u32 ncpus = sched->max_cpu + 1; + u32 ncpus = sched->max_cpu.cpu + 1; u32 i, j; printf("%15s %6s ", "time", "cpu"); @@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched, struct thread_runtime *tr = thread__priv(thread); const char *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 next_pid = evsel__intval(evsel, sample, "next_pid"); - u32 max_cpus = sched->max_cpu + 1; + u32 max_cpus = sched->max_cpu.cpu + 1; char tstr[64]; char nstr[30]; u64 wait_time; @@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched, timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr)); printf("%15s [%04d] ", tstr, sample->cpu); if (sched->show_cpu_visual) - printf(" %*s ", sched->max_cpu + 1, ""); + printf(" %*s ", sched->max_cpu.cpu + 1, ""); printf(" %-*s ", comm_width, timehist_get_commstr(thread)); @@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched, { struct thread *thread; char tstr[64]; - u32 max_cpus = sched->max_cpu + 1; + u32 max_cpus; u32 ocpu, dcpu; if (sched->summary_only) return; - max_cpus = sched->max_cpu + 1; + max_cpus = sched->max_cpu.cpu + 1; ocpu = evsel__intval(evsel, sample, "orig_cpu"); dcpu = evsel__intval(evsel, sample, "dest_cpu"); @@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" Total scheduling time (msec): "); print_sched_time(hist_time, 2); - printf(" (x %d)\n", sched->max_cpu); + printf(" (x %d)\n", sched->max_cpu.cpu); } typedef int (*sched_handler)(struct perf_tool *tool, @@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); int err = 0; - int this_cpu = sample->cpu; + struct perf_cpu this_cpu = { + .cpu = sample->cpu, + }; - if (this_cpu > sched->max_cpu) + if (this_cpu.cpu > sched->max_cpu.cpu) sched->max_cpu = this_cpu; if (evsel->handler != NULL) { @@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; /* pre-allocate struct for per-CPU idle stats */ - sched->max_cpu = session->header.env.nr_cpus_online; - if (sched->max_cpu == 0) - sched->max_cpu = 4; - if (init_idle_threads(sched->max_cpu)) + sched->max_cpu.cpu = session->header.env.nr_cpus_online; + if (sched->max_cpu.cpu == 0) + sched->max_cpu.cpu = 4; + if (init_idle_threads(sched->max_cpu.cpu)) goto out; /* summary_only implies summary option, but don't overwrite summary if set */ @@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched) { struct perf_cpu_map *map; - sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); + sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF); if (sched->map.comp) { - sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int)); + sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int)); if (!sched->map.comp_cpus) return -1; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c82b033e8942..ecd4f99a6c14 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -15,6 +15,7 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" +#include "util/env.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/evsel_fprintf.h" @@ -648,7 +649,7 @@ out: return 0; } -static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, +static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch, FILE *fp) { unsigned i = 0, r; @@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { u64 val = regs->regs[i++]; - printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val); + printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val); } return printed; @@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen, } static int perf_sample__fprintf_iregs(struct perf_sample *sample, - struct perf_event_attr *attr, FILE *fp) + struct perf_event_attr *attr, const char *arch, FILE *fp) { return perf_sample__fprintf_regs(&sample->intr_regs, - attr->sample_regs_intr, fp); + attr->sample_regs_intr, arch, fp); } static int perf_sample__fprintf_uregs(struct perf_sample *sample, - struct perf_event_attr *attr, FILE *fp) + struct perf_event_attr *attr, const char *arch, FILE *fp) { return perf_sample__fprintf_regs(&sample->user_regs, - attr->sample_regs_user, fp); + attr->sample_regs_user, arch, fp); } static int perf_sample__fprintf_start(struct perf_script *script, @@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script, struct evsel_script *es = evsel->priv; FILE *fp = es->fp; char str[PAGE_SIZE_NAME_LEN]; + const char *arch = perf_env__arch(machine->env); if (output[type].fields == 0) return; @@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script, } if (PRINT_FIELD(IREGS)) - perf_sample__fprintf_iregs(sample, attr, fp); + perf_sample__fprintf_iregs(sample, attr, arch, fp); if (PRINT_FIELD(UREGS)) - perf_sample__fprintf_uregs(sample, attr, fp); + perf_sample__fprintf_uregs(sample, attr, arch, fp); if (PRINT_FIELD(BRSTACK)) perf_sample__fprintf_brstack(sample, thread, attr, fp); @@ -2113,8 +2115,8 @@ static struct scripting_ops *scripting_ops; static void __process_stat(struct evsel *counter, u64 tstamp) { int nthreads = perf_thread_map__nr(counter->core.threads); - int ncpus = evsel__nr_cpus(counter); - int cpu, thread; + int idx, thread; + struct perf_cpu cpu; static int header_printed; if (counter->core.system_wide) @@ -2127,13 +2129,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp) } for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { + perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { struct perf_counts_values *counts; - counts = perf_counts(counter->counts, cpu, thread); + counts = perf_counts(counter->counts, idx, thread); printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", - counter->core.cpus->map[cpu], + cpu.cpu, perf_thread_map__pid(counter->core.threads, thread), counts->val, counts->ena, @@ -2316,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, * on events sample_type. */ sample_type = evlist__combined_sample_type(evlist); - callchain_param_setup(sample_type); + callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env)); /* Enable fields for callchain entries */ if (symbol_conf.use_callchain && @@ -3466,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script) struct perf_session *session = script->session; u64 sample_type = evlist__combined_sample_type(session->evlist); - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { - if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) { - callchain_param.record_mode = CALLCHAIN_DWARF; - dwarf_callchain_users = true; - } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) - callchain_param.record_mode = CALLCHAIN_LBR; - else - callchain_param.record_mode = CALLCHAIN_FP; - } + callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env)); if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) { pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n" diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7974933dbc77..973ade18b72a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -234,7 +234,7 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b) return false; for (int i = 0; i < a->core.cpus->nr; i++) { - if (a->core.cpus->map[i] != b->core.cpus->map[i]) + if (a->core.cpus->map[i].cpu != b->core.cpus->map[i].cpu) return false; } @@ -327,34 +327,35 @@ static int write_stat_round_event(u64 tm, u64 type) #define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y) -static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread, +static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread, struct perf_counts_values *count) { - struct perf_sample_id *sid = SID(counter, cpu, thread); + struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread); + struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx); return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, process_synthesized_event, NULL); } -static int read_single_counter(struct evsel *counter, int cpu, +static int read_single_counter(struct evsel *counter, int cpu_map_idx, int thread, struct timespec *rs) { if (counter->tool_event == PERF_TOOL_DURATION_TIME) { u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL; struct perf_counts_values *count = - perf_counts(counter->counts, cpu, thread); + perf_counts(counter->counts, cpu_map_idx, thread); count->ena = count->run = val; count->val = val; return 0; } - return evsel__read_counter(counter, cpu, thread); + return evsel__read_counter(counter, cpu_map_idx, thread); } /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode */ -static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) +static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx) { int nthreads = perf_thread_map__nr(evsel_list->core.threads); int thread; @@ -368,24 +369,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) for (thread = 0; thread < nthreads; thread++) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu, thread); + count = perf_counts(counter->counts, cpu_map_idx, thread); /* * The leader's group read loads data into its group members * (via evsel__read_counter()) and sets their count->loaded. */ - if (!perf_counts__is_loaded(counter->counts, cpu, thread) && - read_single_counter(counter, cpu, thread, rs)) { + if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) && + read_single_counter(counter, cpu_map_idx, thread, rs)) { counter->counts->scaled = -1; - perf_counts(counter->counts, cpu, thread)->ena = 0; - perf_counts(counter->counts, cpu, thread)->run = 0; + perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0; + perf_counts(counter->counts, cpu_map_idx, thread)->run = 0; return -1; } - perf_counts__set_loaded(counter->counts, cpu, thread, false); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false); if (STAT_RECORD) { - if (evsel__write_stat_event(counter, cpu, thread, count)) { + if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) { pr_err("failed to write stat event\n"); return -1; } @@ -395,7 +396,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) fprintf(stat_config.output, "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", evsel__name(counter), - cpu, + perf_cpu_map__cpu(evsel__cpus(counter), + cpu_map_idx).cpu, count->val, count->ena, count->run); } } @@ -405,36 +407,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu) static int read_affinity_counters(struct timespec *rs) { - struct evsel *counter; - struct affinity affinity; - int i, ncpus, cpu; + struct evlist_cpu_iterator evlist_cpu_itr; + struct affinity saved_affinity, *affinity; if (all_counters_use_bpf) return 0; - if (affinity__setup(&affinity) < 0) + if (!target__has_cpu(&target) || target__has_per_thread(&target)) + affinity = NULL; + else if (affinity__setup(&saved_affinity) < 0) return -1; + else + affinity = &saved_affinity; - ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus); - if (!target__has_cpu(&target) || target__has_per_thread(&target)) - ncpus = 1; - evlist__for_each_cpu(evsel_list, i, cpu) { - if (i >= ncpus) - break; - affinity__set(&affinity, cpu); + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { + struct evsel *counter = evlist_cpu_itr.evsel; - evlist__for_each_entry(evsel_list, counter) { - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (evsel__is_bpf(counter)) - continue; - if (!counter->err) { - counter->err = read_counter_cpu(counter, rs, - counter->cpu_iter - 1); - } + if (evsel__is_bpf(counter)) + continue; + + if (!counter->err) { + counter->err = read_counter_cpu(counter, rs, + evlist_cpu_itr.cpu_map_idx); } } - affinity__cleanup(&affinity); + if (affinity) + affinity__cleanup(&saved_affinity); + return 0; } @@ -788,8 +787,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) int status = 0; const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int i, cpu, err; + int err; bool second_pass = false; if (forks) { @@ -813,102 +813,97 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) all_counters_use_bpf = false; } - evlist__for_each_cpu (evsel_list, i, cpu) { + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + counter = evlist_cpu_itr.evsel; + /* * bperf calls evsel__open_per_cpu() in bperf__load(), so * no need to call it again here. */ if (target.use_bpf) break; - affinity__set(&affinity, cpu); - evlist__for_each_entry(evsel_list, counter) { - if (evsel__cpu_iter_skip(counter, cpu)) + if (counter->reset_group || counter->errored) + continue; + if (evsel__is_bpf(counter)) + continue; +try_again: + if (create_perf_stat_counter(counter, &stat_config, &target, + evlist_cpu_itr.cpu_map_idx) < 0) { + + /* + * Weak group failed. We cannot just undo this here + * because earlier CPUs might be in group mode, and the kernel + * doesn't support mixing group and non group reads. Defer + * it to later. + * Don't close here because we're in the wrong affinity. + */ + if ((errno == EINVAL || errno == EBADF) && + evsel__leader(counter) != counter && + counter->weak_group) { + evlist__reset_weak_group(evsel_list, counter, false); + assert(counter->reset_group); + second_pass = true; continue; - if (counter->reset_group || counter->errored) + } + + switch (stat_handle_error(counter)) { + case COUNTER_FATAL: + return -1; + case COUNTER_RETRY: + goto try_again; + case COUNTER_SKIP: continue; - if (evsel__is_bpf(counter)) + default: + break; + } + + } + counter->supported = true; + } + + if (second_pass) { + /* + * Now redo all the weak group after closing them, + * and also close errored counters. + */ + + /* First close errored or weak retry */ + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + counter = evlist_cpu_itr.evsel; + + if (!counter->reset_group && !counter->errored) continue; -try_again: + + perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); + } + /* Now reopen weak */ + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + counter = evlist_cpu_itr.evsel; + + if (!counter->reset_group && !counter->errored) + continue; + if (!counter->reset_group) + continue; +try_again_reset: + pr_debug2("reopening weak %s\n", evsel__name(counter)); if (create_perf_stat_counter(counter, &stat_config, &target, - counter->cpu_iter - 1) < 0) { - - /* - * Weak group failed. We cannot just undo this here - * because earlier CPUs might be in group mode, and the kernel - * doesn't support mixing group and non group reads. Defer - * it to later. - * Don't close here because we're in the wrong affinity. - */ - if ((errno == EINVAL || errno == EBADF) && - evsel__leader(counter) != counter && - counter->weak_group) { - evlist__reset_weak_group(evsel_list, counter, false); - assert(counter->reset_group); - second_pass = true; - continue; - } + evlist_cpu_itr.cpu_map_idx) < 0) { switch (stat_handle_error(counter)) { case COUNTER_FATAL: return -1; case COUNTER_RETRY: - goto try_again; + goto try_again_reset; case COUNTER_SKIP: continue; default: break; } - } counter->supported = true; } } - - if (second_pass) { - /* - * Now redo all the weak group after closing them, - * and also close errored counters. - */ - - evlist__for_each_cpu(evsel_list, i, cpu) { - affinity__set(&affinity, cpu); - /* First close errored or weak retry */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->reset_group && !counter->errored) - continue; - if (evsel__cpu_iter_skip_no_inc(counter, cpu)) - continue; - perf_evsel__close_cpu(&counter->core, counter->cpu_iter); - } - /* Now reopen weak */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->reset_group && !counter->errored) - continue; - if (evsel__cpu_iter_skip(counter, cpu)) - continue; - if (!counter->reset_group) - continue; -try_again_reset: - pr_debug2("reopening weak %s\n", evsel__name(counter)); - if (create_perf_stat_counter(counter, &stat_config, &target, - counter->cpu_iter - 1) < 0) { - - switch (stat_handle_error(counter)) { - case COUNTER_FATAL: - return -1; - case COUNTER_RETRY: - goto try_again_reset; - case COUNTER_SKIP: - continue; - default: - break; - } - } - counter->supported = true; - } - } - } affinity__cleanup(&affinity); evlist__for_each_entry(evsel_list, counter) { @@ -1168,6 +1163,26 @@ static int parse_stat_cgroups(const struct option *opt, return parse_cgroups(opt, str, unset); } +static int parse_hybrid_type(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + struct evlist *evlist = *(struct evlist **)opt->value; + + if (!list_empty(&evlist->core.entries)) { + fprintf(stderr, "Must define cputype before events/metrics\n"); + return -1; + } + + evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str); + if (!evlist->hybrid_pmu_name) { + fprintf(stderr, "--cputype %s is not supported!\n", str); + return -1; + } + + return 0; +} + static struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1282,6 +1297,10 @@ static struct option stat_options[] = { "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), + OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", + "Only enable events on applying cpu with this type " + "for hybrid platform (e.g. core or atom)", + parse_hybrid_type), #ifdef HAVE_LIBPFM OPT_CALLBACK(0, "pfm-events", &evsel_list, "event", "libpfm4 event selector. use 'perf list' to list available events", @@ -1298,70 +1317,75 @@ static struct option stat_options[] = { OPT_END() }; +static const char *const aggr_mode__string[] = { + [AGGR_CORE] = "core", + [AGGR_DIE] = "die", + [AGGR_GLOBAL] = "global", + [AGGR_NODE] = "node", + [AGGR_NONE] = "none", + [AGGR_SOCKET] = "socket", + [AGGR_THREAD] = "thread", + [AGGR_UNSET] = "unset", +}; + static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_socket(map, cpu, NULL); + return aggr_cpu_id__socket(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_die(map, cpu, NULL); + return aggr_cpu_id__die(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_core(map, cpu, NULL); + return aggr_cpu_id__core(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int cpu) + struct perf_cpu cpu) { - return cpu_map__get_node(map, cpu, NULL); + return aggr_cpu_id__node(cpu, /*data=*/NULL); } static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, - aggr_get_id_t get_id, struct perf_cpu_map *map, int idx) + aggr_get_id_t get_id, struct perf_cpu cpu) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (idx >= map->nr) - return id; + if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) + config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); - cpu = map->map[idx]; - - if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu])) - config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - - id = config->cpus_aggr_map->map[cpu]; + id = config->cpus_aggr_map->map[cpu.cpu]; return id; } static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_socket, cpu); } static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_die, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_die, cpu); } static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_core, cpu); } static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_stat__get_aggr(config, perf_stat__get_node, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } static bool term_percore_set(void) @@ -1376,54 +1400,67 @@ static bool term_percore_set(void) return false; } -static int perf_stat_init_aggr_mode(void) +static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) { - int nr; + switch (aggr_mode) { + case AGGR_SOCKET: + return aggr_cpu_id__socket; + case AGGR_DIE: + return aggr_cpu_id__die; + case AGGR_CORE: + return aggr_cpu_id__core; + case AGGR_NODE: + return aggr_cpu_id__node; + case AGGR_NONE: + if (term_percore_set()) + return aggr_cpu_id__core; + + return NULL; + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + return NULL; + } +} - switch (stat_config.aggr_mode) { +static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) +{ + switch (aggr_mode) { case AGGR_SOCKET: - if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build socket map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_socket_cached; - break; + return perf_stat__get_socket_cached; case AGGR_DIE: - if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build die map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_die_cached; - break; + return perf_stat__get_die_cached; case AGGR_CORE: - if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_cached; - break; + return perf_stat__get_core_cached; case AGGR_NODE: - if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_node_cached; - break; + return perf_stat__get_node_cached; case AGGR_NONE: if (term_percore_set()) { - if (cpu_map__build_core_map(evsel_list->core.cpus, - &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_cached; + return perf_stat__get_core_cached; } - break; + return NULL; case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: default: - break; + return NULL; + } +} + +static int perf_stat_init_aggr_mode(void) +{ + int nr; + aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); + + if (get_id) { + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, + get_id, /*data=*/NULL); + if (!stat_config.aggr_map) { + pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + return -1; + } + stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } /* @@ -1431,7 +1468,7 @@ static int perf_stat_init_aggr_mode(void) * taking the highest cpu number to be the size of * the aggregation translate cpumap. */ - nr = perf_cpu_map__max(evsel_list->core.cpus); + nr = perf_cpu_map__max(evsel_list->core.cpus).cpu; stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1); return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } @@ -1459,169 +1496,139 @@ static void perf_stat__exit_aggr_mode(void) stat_config.cpus_aggr_map = NULL; } -static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx) -{ - int cpu; - - if (idx > map->nr) - return -1; - - cpu = map->map[idx]; - - if (cpu >= env->nr_cpus_avail) - return -1; - - return cpu; -} - -static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; - int cpu = perf_env__get_cpu(env, map, idx); - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) - id.socket = env->cpu[cpu].socket_id; + if (cpu.cpu != -1) + id.socket = env->cpu[cpu.cpu].socket_id; return id; } -static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) { + if (cpu.cpu != -1) { /* * die_id is relative to socket, so start * with the socket ID and then add die to * make a unique ID. */ - id.socket = env->cpu[cpu].socket_id; - id.die = env->cpu[cpu].die_id; + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; } return id; } -static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct perf_env *env = data; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - int cpu = perf_env__get_cpu(env, map, idx); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (cpu != -1) { + if (cpu.cpu != -1) { /* * core_id is relative to socket and die, * we need a global id. So we set * socket, die id and core id */ - id.socket = env->cpu[cpu].socket_id; - id.die = env->cpu[cpu].die_id; - id.core = env->cpu[cpu].core_id; + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; } return id; } -static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data) +static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { - int cpu = perf_env__get_cpu(data, map, idx); - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); id.node = perf_env__numa_node(data, cpu); return id; } -static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **sockp) -{ - return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); -} - -static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **diep) -{ - return cpu_map__build_map(cpus, diep, perf_env__get_die, env); -} - -static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **corep) -{ - return cpu_map__build_map(cpus, corep, perf_env__get_core, env); -} - -static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus, - struct cpu_aggr_map **nodep) -{ - return cpu_map__build_map(cpus, nodep, perf_env__get_node, env); -} - static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_socket(map, idx, &perf_stat.session->header.env); + return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_die(map, idx, &perf_stat.session->header.env); + return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_core(map, idx, &perf_stat.session->header.env); + return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, - struct perf_cpu_map *map, int idx) + struct perf_cpu cpu) { - return perf_env__get_node(map, idx, &perf_stat.session->header.env); + return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } -static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { - struct perf_env *env = &st->session->header.env; + switch (aggr_mode) { + case AGGR_SOCKET: + return perf_env__get_socket_aggr_by_cpu; + case AGGR_DIE: + return perf_env__get_die_aggr_by_cpu; + case AGGR_CORE: + return perf_env__get_core_aggr_by_cpu; + case AGGR_NODE: + return perf_env__get_node_aggr_by_cpu; + case AGGR_NONE: + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + return NULL; + } +} - switch (stat_config.aggr_mode) { +static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) +{ + switch (aggr_mode) { case AGGR_SOCKET: - if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build socket map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_socket_file; - break; + return perf_stat__get_socket_file; case AGGR_DIE: - if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build die map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_die_file; - break; + return perf_stat__get_die_file; case AGGR_CORE: - if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_core_file; - break; + return perf_stat__get_core_file; case AGGR_NODE: - if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) { - perror("cannot build core map"); - return -1; - } - stat_config.aggr_get_id = perf_stat__get_node_file; - break; + return perf_stat__get_node_file; case AGGR_NONE: case AGGR_GLOBAL: case AGGR_THREAD: case AGGR_UNSET: default: - break; + return NULL; } +} + +static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +{ + struct perf_env *env = &st->session->header.env; + aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + + if (!get_id) + return 0; + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env); + if (!stat_config.aggr_map) { + pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); + return -1; + } + stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode); return 0; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ef94388e8323..32844d8a0ea5 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2726,6 +2726,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, offset = format_field__intval(field, sample, evsel->needs_swap); syscall_arg.len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } val = (uintptr_t)(sample->raw_data + offset); @@ -3257,10 +3259,21 @@ static void trace__set_bpf_map_syscalls(struct trace *trace) static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { + struct bpf_program *pos, *prog = NULL; + const char *sec_name; + if (trace->bpf_obj == NULL) return NULL; - return bpf_object__find_program_by_title(trace->bpf_obj, name); + bpf_object__for_each_program(pos, trace->bpf_obj) { + sec_name = bpf_program__section_name(pos); + if (sec_name && !strcmp(sec_name, name)) { + prog = pos; + break; + } + } + + return prog; } static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, @@ -3951,6 +3964,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__add(evlist, pgfault_min); } + /* Enable ignoring missing threads when -u/-p option is defined. */ + trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid; + if (trace->sched && evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime)) goto out_error_sched_stat_runtime; diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 7565a1852c74..b17eb52a0694 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) { - struct filter_data *d = data; - pr_debug("%s API\n", __func__); return do_checks(data, sample, ctx, false); diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json new file mode 100644 index 000000000000..79f2016c53b0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json new file mode 100644 index 000000000000..579c1c993d17 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json new file mode 100644 index 000000000000..0141f749bff3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json @@ -0,0 +1,155 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L3D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L3D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L3D_CACHE" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "LL_CACHE_RD" + }, + { + "ArchStdEvent": "LL_CACHE_MISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L3D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json new file mode 100644 index 000000000000..344a2d552ad5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json new file mode 100644 index 000000000000..e57cd55937c6 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json @@ -0,0 +1,143 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + }, + { + "ArchStdEvent": "ASE_INST_SPEC" + }, + { + "ArchStdEvent": "SVE_INST_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_EMPTY_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_PARTIAL_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_FAULT_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT8_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT16_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT32_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT64_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json new file mode 100644 index 000000000000..e522113aeb96 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json @@ -0,0 +1,38 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + }, + { + "ArchStdEvent": "LDST_ALIGN_LAT" + }, + { + "ArchStdEvent": "LD_ALIGN_LAT" + }, + { + "ArchStdEvent": "ST_ALIGN_LAT" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_CHECKED_WR" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json new file mode 100644 index 000000000000..20d8365756c5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/other.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "REMOTE_ACCESS" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json new file mode 100644 index 000000000000..20f2165c85fe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "SAMPLE_POP" + }, + { + "ArchStdEvent": "SAMPLE_FEED" + }, + { + "ArchStdEvent": "SAMPLE_FILTRATE" + }, + { + "ArchStdEvent": "SAMPLE_COLLISION" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json new file mode 100644 index 000000000000..3116135c59e2 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "TRB_WRAP" + }, + { + "ArchStdEvent": "TRCEXTOUT0" + }, + { + "ArchStdEvent": "TRCEXTOUT1" + }, + { + "ArchStdEvent": "TRCEXTOUT2" + }, + { + "ArchStdEvent": "TRCEXTOUT3" + }, + { + "ArchStdEvent": "CTI_TRIGOUT4" + }, + { + "ArchStdEvent": "CTI_TRIGOUT5" + }, + { + "ArchStdEvent": "CTI_TRIGOUT6" + }, + { + "ArchStdEvent": "CTI_TRIGOUT7" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json index 423767510aff..80d7a70829a0 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json @@ -300,6 +300,30 @@ "BriefDescription": "No operation sent for execution on a slot" }, { + "PublicDescription": "Sample Population", + "EventCode": "0x4000", + "EventName": "SAMPLE_POP", + "BriefDescription": "Sample Population" + }, + { + "PublicDescription": "Sample Taken", + "EventCode": "0x4001", + "EventName": "SAMPLE_FEED", + "BriefDescription": "Sample Taken" + }, + { + "PublicDescription": "Sample Taken and not removed by filtering", + "EventCode": "0x4002", + "EventName": "SAMPLE_FILTRATE", + "BriefDescription": "Sample Taken and not removed by filtering" + }, + { + "PublicDescription": "Sample collided with previous sample", + "EventCode": "0x4003", + "EventName": "SAMPLE_COLLISION", + "BriefDescription": "Sample collided with previous sample" + }, + { "PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.", "EventCode": "0x4004", "EventName": "CNT_CYCLES", @@ -330,6 +354,96 @@ "BriefDescription": "Level 3 data cache long-latency read miss" }, { + "PublicDescription": "Trace buffer current write pointer wrapped", + "EventCode": "0x400C", + "EventName": "TRB_WRAP", + "BriefDescription": "Trace buffer current write pointer wrapped" + }, + { + "PublicDescription": "PE Trace Unit external output 0", + "EventCode": "0x4010", + "EventName": "TRCEXTOUT0", + "BriefDescription": "PE Trace Unit external output 0" + }, + { + "PublicDescription": "PE Trace Unit external output 1", + "EventCode": "0x4011", + "EventName": "TRCEXTOUT1", + "BriefDescription": "PE Trace Unit external output 1" + }, + { + "PublicDescription": "PE Trace Unit external output 2", + "EventCode": "0x4012", + "EventName": "TRCEXTOUT2", + "BriefDescription": "PE Trace Unit external output 2" + }, + { + "PublicDescription": "PE Trace Unit external output 3", + "EventCode": "0x4013", + "EventName": "TRCEXTOUT3", + "BriefDescription": "PE Trace Unit external output 3" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 4", + "EventCode": "0x4018", + "EventName": "CTI_TRIGOUT4", + "BriefDescription": "Cross-trigger Interface output trigger 4" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 5 ", + "EventCode": "0x4019", + "EventName": "CTI_TRIGOUT5", + "BriefDescription": "Cross-trigger Interface output trigger 5 " + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 6", + "EventCode": "0x401A", + "EventName": "CTI_TRIGOUT6", + "BriefDescription": "Cross-trigger Interface output trigger 6" + }, + { + "PublicDescription": "Cross-trigger Interface output trigger 7", + "EventCode": "0x401B", + "EventName": "CTI_TRIGOUT7", + "BriefDescription": "Cross-trigger Interface output trigger 7" + }, + { + "PublicDescription": "Access with additional latency from alignment", + "EventCode": "0x4020", + "EventName": "LDST_ALIGN_LAT", + "BriefDescription": "Access with additional latency from alignment" + }, + { + "PublicDescription": "Load with additional latency from alignment", + "EventCode": "0x4021", + "EventName": "LD_ALIGN_LAT", + "BriefDescription": "Load with additional latency from alignment" + }, + { + "PublicDescription": "Store with additional latency from alignment", + "EventCode": "0x4022", + "EventName": "ST_ALIGN_LAT", + "BriefDescription": "Store with additional latency from alignment" + }, + { + "PublicDescription": "Checked data memory access", + "EventCode": "0x4024", + "EventName": "MEM_ACCESS_CHECKED", + "BriefDescription": "Checked data memory access" + }, + { + "PublicDescription": "Checked data memory access, read", + "EventCode": "0x4025", + "EventName": "MEM_ACCESS_CHECKED_RD", + "BriefDescription": "Checked data memory access, read" + }, + { + "PublicDescription": "Checked data memory access, write", + "EventCode": "0x4026", + "EventName": "MEM_ACCESS_CHECKED_WR", + "BriefDescription": "Checked data memory access, write" + }, + { "PublicDescription": "SIMD Instruction architecturally executed.", "EventCode": "0x8000", "EventName": "SIMD_INST_RETIRED", @@ -342,6 +456,18 @@ "BriefDescription": "Instruction architecturally executed, SVE." }, { + "PublicDescription": "ASE operations speculatively executed", + "EventCode": "0x8005", + "EventName": "ASE_INST_SPEC", + "BriefDescription": "ASE operations speculatively executed" + }, + { + "PublicDescription": "SVE operations speculatively executed", + "EventCode": "0x8006", + "EventName": "SVE_INST_SPEC", + "BriefDescription": "SVE operations speculatively executed" + }, + { "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", "EventCode": "0x8008", "EventName": "UOP_SPEC", @@ -360,6 +486,24 @@ "BriefDescription": "Floating-point Operations speculatively executed." }, { + "PublicDescription": "Floating-point half-precision operations speculatively executed", + "EventCode": "0x8014", + "EventName": "FP_HP_SPEC", + "BriefDescription": "Floating-point half-precision operations speculatively executed" + }, + { + "PublicDescription": "Floating-point single-precision operations speculatively executed", + "EventCode": "0x8018", + "EventName": "FP_SP_SPEC", + "BriefDescription": "Floating-point single-precision operations speculatively executed" + }, + { + "PublicDescription": "Floating-point double-precision operations speculatively executed", + "EventCode": "0x801C", + "EventName": "FP_DP_SPEC", + "BriefDescription": "Floating-point double-precision operations speculatively executed" + }, + { "PublicDescription": "Floating-point FMA Operations speculatively executed.", "EventCode": "0x8028", "EventName": "FP_FMA_SPEC", @@ -390,6 +534,30 @@ "BriefDescription": "SVE predicated Operations speculatively executed." }, { + "PublicDescription": "SVE predicated operations with no active predicates speculatively executed", + "EventCode": "0x8075", + "EventName": "SVE_PRED_EMPTY_SPEC", + "BriefDescription": "SVE predicated operations with no active predicates speculatively executed" + }, + { + "PublicDescription": "SVE predicated operations speculatively executed with all active predicates", + "EventCode": "0x8076", + "EventName": "SVE_PRED_FULL_SPEC", + "BriefDescription": "SVE predicated operations speculatively executed with all active predicates" + }, + { + "PublicDescription": "SVE predicated operations speculatively executed with partially active predicates", + "EventCode": "0x8077", + "EventName": "SVE_PRED_PARTIAL_SPEC", + "BriefDescription": "SVE predicated operations speculatively executed with partially active predicates" + }, + { + "PublicDescription": "SVE predicated operations with empty or partially active predicates", + "EventCode": "0x8079", + "EventName": "SVE_PRED_NOT_FULL_SPEC", + "BriefDescription": "SVE predicated operations with empty or partially active predicates" + }, + { "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", "EventCode": "0x807C", "EventName": "SVE_MOVPRFX_SPEC", @@ -498,6 +666,12 @@ "BriefDescription": "SVE First-fault load Operations speculatively executed." }, { + "PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0", + "EventCode": "0x80BD", + "EventName": "SVE_LDFF_FAULT_SPEC", + "BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0" + }, + { "PublicDescription": "Scalable floating-point element Operations speculatively executed.", "EventCode": "0x80C0", "EventName": "FP_SCALE_OPS_SPEC", @@ -544,5 +718,29 @@ "EventCode": "0x80C7", "EventName": "FP_DP_FIXED_OPS_SPEC", "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed", + "EventCode": "0x80E3", + "EventName": "ASE_SVE_INT8_SPEC", + "BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed", + "EventCode": "0x80E7", + "EventName": "ASE_SVE_INT16_SPEC", + "BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed", + "EventCode": "0x80EB", + "EventName": "ASE_SVE_INT32_SPEC", + "BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed" + }, + { + "PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed", + "EventCode": "0x80EF", + "EventName": "ASE_SVE_INT64_SPEC", + "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed" } ] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 31d8b57ca9bb..b899db48c12a 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -19,6 +19,7 @@ 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000410fd400,v1,arm/neoverse-v1,core +0x00000000410fd490,v1,arm/neoverse-n2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json index d0a19866563d..210afa856091 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json +++ b/tools/perf/pmu-events/arch/arm64/recommended.json @@ -148,305 +148,305 @@ "EventCode": "0x60", "EventName": "BUS_ACCESS_RD", "BriefDescription": "Bus access read" - }, - { + }, + { "PublicDescription": "Bus access write", "EventCode": "0x61", "EventName": "BUS_ACCESS_WR", "BriefDescription": "Bus access write" - }, - { + }, + { "PublicDescription": "Bus access, Normal, Cacheable, Shareable", "EventCode": "0x62", "EventName": "BUS_ACCESS_SHARED", "BriefDescription": "Bus access, Normal, Cacheable, Shareable" - }, - { + }, + { "PublicDescription": "Bus access, not Normal, Cacheable, Shareable", "EventCode": "0x63", "EventName": "BUS_ACCESS_NOT_SHARED", "BriefDescription": "Bus access, not Normal, Cacheable, Shareable" - }, - { + }, + { "PublicDescription": "Bus access, Normal", "EventCode": "0x64", "EventName": "BUS_ACCESS_NORMAL", "BriefDescription": "Bus access, Normal" - }, - { + }, + { "PublicDescription": "Bus access, peripheral", "EventCode": "0x65", "EventName": "BUS_ACCESS_PERIPH", "BriefDescription": "Bus access, peripheral" - }, - { + }, + { "PublicDescription": "Data memory access, read", "EventCode": "0x66", "EventName": "MEM_ACCESS_RD", "BriefDescription": "Data memory access, read" - }, - { + }, + { "PublicDescription": "Data memory access, write", "EventCode": "0x67", "EventName": "MEM_ACCESS_WR", "BriefDescription": "Data memory access, write" - }, - { + }, + { "PublicDescription": "Unaligned access, read", "EventCode": "0x68", "EventName": "UNALIGNED_LD_SPEC", "BriefDescription": "Unaligned access, read" - }, - { + }, + { "PublicDescription": "Unaligned access, write", "EventCode": "0x69", "EventName": "UNALIGNED_ST_SPEC", "BriefDescription": "Unaligned access, write" - }, - { + }, + { "PublicDescription": "Unaligned access", "EventCode": "0x6a", "EventName": "UNALIGNED_LDST_SPEC", "BriefDescription": "Unaligned access" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX", "EventCode": "0x6c", "EventName": "LDREX_SPEC", "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass", "EventCode": "0x6d", "EventName": "STREX_PASS_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail", "EventCode": "0x6e", "EventName": "STREX_FAIL_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail" - }, - { + }, + { "PublicDescription": "Exclusive operation speculatively executed, STREX or STX", "EventCode": "0x6f", "EventName": "STREX_SPEC", "BriefDescription": "Exclusive operation speculatively executed, STREX or STX" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, load", "EventCode": "0x70", "EventName": "LD_SPEC", "BriefDescription": "Operation speculatively executed, load" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, store", "EventCode": "0x71", "EventName": "ST_SPEC", "BriefDescription": "Operation speculatively executed, store" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, load or store", "EventCode": "0x72", "EventName": "LDST_SPEC", "BriefDescription": "Operation speculatively executed, load or store" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, integer data processing", "EventCode": "0x73", "EventName": "DP_SPEC", "BriefDescription": "Operation speculatively executed, integer data processing" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction", "EventCode": "0x74", "EventName": "ASE_SPEC", "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, floating-point instruction", "EventCode": "0x75", "EventName": "VFP_SPEC", "BriefDescription": "Operation speculatively executed, floating-point instruction" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, software change of the PC", "EventCode": "0x76", "EventName": "PC_WRITE_SPEC", "BriefDescription": "Operation speculatively executed, software change of the PC" - }, - { + }, + { "PublicDescription": "Operation speculatively executed, Cryptographic instruction", "EventCode": "0x77", "EventName": "CRYPTO_SPEC", "BriefDescription": "Operation speculatively executed, Cryptographic instruction" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, immediate branch", "EventCode": "0x78", "EventName": "BR_IMMED_SPEC", "BriefDescription": "Branch speculatively executed, immediate branch" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, procedure return", "EventCode": "0x79", "EventName": "BR_RETURN_SPEC", "BriefDescription": "Branch speculatively executed, procedure return" - }, - { + }, + { "PublicDescription": "Branch speculatively executed, indirect branch", "EventCode": "0x7a", "EventName": "BR_INDIRECT_SPEC", "BriefDescription": "Branch speculatively executed, indirect branch" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, ISB", "EventCode": "0x7c", "EventName": "ISB_SPEC", "BriefDescription": "Barrier speculatively executed, ISB" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, DSB", "EventCode": "0x7d", "EventName": "DSB_SPEC", "BriefDescription": "Barrier speculatively executed, DSB" - }, - { + }, + { "PublicDescription": "Barrier speculatively executed, DMB", "EventCode": "0x7e", "EventName": "DMB_SPEC", "BriefDescription": "Barrier speculatively executed, DMB" - }, - { + }, + { "PublicDescription": "Exception taken, Other synchronous", "EventCode": "0x81", "EventName": "EXC_UNDEF", "BriefDescription": "Exception taken, Other synchronous" - }, - { + }, + { "PublicDescription": "Exception taken, Supervisor Call", "EventCode": "0x82", "EventName": "EXC_SVC", "BriefDescription": "Exception taken, Supervisor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Instruction Abort", "EventCode": "0x83", "EventName": "EXC_PABORT", "BriefDescription": "Exception taken, Instruction Abort" - }, - { + }, + { "PublicDescription": "Exception taken, Data Abort and SError", "EventCode": "0x84", "EventName": "EXC_DABORT", "BriefDescription": "Exception taken, Data Abort and SError" - }, - { + }, + { "PublicDescription": "Exception taken, IRQ", "EventCode": "0x86", "EventName": "EXC_IRQ", "BriefDescription": "Exception taken, IRQ" - }, - { + }, + { "PublicDescription": "Exception taken, FIQ", "EventCode": "0x87", "EventName": "EXC_FIQ", "BriefDescription": "Exception taken, FIQ" - }, - { + }, + { "PublicDescription": "Exception taken, Secure Monitor Call", "EventCode": "0x88", "EventName": "EXC_SMC", "BriefDescription": "Exception taken, Secure Monitor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Hypervisor Call", "EventCode": "0x8a", "EventName": "EXC_HVC", "BriefDescription": "Exception taken, Hypervisor Call" - }, - { + }, + { "PublicDescription": "Exception taken, Instruction Abort not taken locally", "EventCode": "0x8b", "EventName": "EXC_TRAP_PABORT", "BriefDescription": "Exception taken, Instruction Abort not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, Data Abort or SError not taken locally", "EventCode": "0x8c", "EventName": "EXC_TRAP_DABORT", "BriefDescription": "Exception taken, Data Abort or SError not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, Other traps not taken locally", "EventCode": "0x8d", "EventName": "EXC_TRAP_OTHER", "BriefDescription": "Exception taken, Other traps not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, IRQ not taken locally", "EventCode": "0x8e", "EventName": "EXC_TRAP_IRQ", "BriefDescription": "Exception taken, IRQ not taken locally" - }, - { + }, + { "PublicDescription": "Exception taken, FIQ not taken locally", "EventCode": "0x8f", "EventName": "EXC_TRAP_FIQ", "BriefDescription": "Exception taken, FIQ not taken locally" - }, - { + }, + { "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire", "EventCode": "0x90", "EventName": "RC_LD_SPEC", "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire" - }, - { + }, + { "PublicDescription": "Release consistency operation speculatively executed, Store-Release", "EventCode": "0x91", "EventName": "RC_ST_SPEC", "BriefDescription": "Release consistency operation speculatively executed, Store-Release" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, read", "EventCode": "0xa0", "EventName": "L3D_CACHE_RD", "BriefDescription": "Attributable Level 3 data or unified cache access, read" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, write", "EventCode": "0xa1", "EventName": "L3D_CACHE_WR", "BriefDescription": "Attributable Level 3 data or unified cache access, write" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache refill, read", "EventCode": "0xa2", "EventName": "L3D_CACHE_REFILL_RD", "BriefDescription": "Attributable Level 3 data or unified cache refill, read" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache refill, write", "EventCode": "0xa3", "EventName": "L3D_CACHE_REFILL_WR", "BriefDescription": "Attributable Level 3 data or unified cache refill, write" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim", "EventCode": "0xa6", "EventName": "L3D_CACHE_WB_VICTIM", "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean", "EventCode": "0xa7", "EventName": "L3D_CACHE_WB_CLEAN", "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean" - }, - { + }, + { "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate", "EventCode": "0xa8", "EventName": "L3D_CACHE_INVAL", "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate" - } + } ] diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 2e7c4153875b..1a57c3f81dd4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -672,8 +672,6 @@ static int json_events(const char *fn, addfield(map, &je.metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &je.metric_expr, "", "", val); - for (s = je.metric_expr; *s; s++) - *s = tolower(*s); } else if (json_streq(map, field, "ArchStdEvent")) { addfield(map, &arch_std, "", "", val); for (s = arch_std; *s; s++) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 803ca426f8e6..af2b37ef7c70 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o perf-y += expand-cgroup.o perf-y += perf-time-to-tsc.o perf-y += dlfilter-test.o +perf-y += sigtrap.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0f73e300f207..56fba08a3037 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -65,7 +65,7 @@ do { \ #define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) -static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { FILE *file; @@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, /* syscall arguments */ __WRITE_ASS(fd, "d", fd); __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu); + __WRITE_ASS(cpu, "d", cpu.cpu); __WRITE_ASS(pid, "d", pid); __WRITE_ASS(flags, "lu", flags); @@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, return 0; } -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { int errno_saved = errno; diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 384856347236..0bf399c49849 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < map->nr; i++) - set_bit(map->map[i], bm); + set_bit(map->map[i].cpu, bm); } if (map) diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 329f77f592f4..573490530194 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -296,9 +296,13 @@ static int check_env(void) return err; } +/* temporarily disable libbpf deprecation warnings */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, ARRAY_SIZE(insns), license, kver_int, NULL, 0); +#pragma GCC diagnostic pop if (err < 0) { pr_err("Missing basic BPF support, skip this test: %s\n", strerror(errno)); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 8cb5a1c3489e..fac3717d9ba1 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = { &suite__expand_cgroup_events, &suite__perf_time_to_tsc, &suite__dlfilter, + &suite__sigtrap, NULL, }; @@ -420,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width, continue; st.file = ent->d_name; - pr_info("%2d: %-*s:", i, width, test_suite.desc); + pr_info("%3d: %-*s:", i, width, test_suite.desc); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -470,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) continue; } - pr_info("%2d: %-*s:", i, width, test_description(t, -1)); + pr_info("%3d: %-*s:", i, width, test_description(t, -1)); if (intlist__find(skiplist, i)) { color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); @@ -510,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) curr, argc, argv)) continue; - pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, + pr_info("%3d.%1d: %-*s:", i, subi + 1, subw, test_description(t, subi)); test_and_print(t, subi); } @@ -545,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!perf_test__matches(t.desc, curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, t.desc); + pr_info("%3d: %s\n", i, t.desc); } @@ -567,14 +568,14 @@ static int perf_test__list(int argc, const char **argv) if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) continue; - pr_info("%2d: %s\n", i, test_description(t, -1)); + pr_info("%3d: %s\n", i, test_description(t, -1)); if (has_subtests(t)) { int subn = num_subtests(t); int subi; for (subi = 0; subi < subn; subi++) - pr_info("%2d:%1d: %s\n", i, subi + 1, + pr_info("%3d:%1d: %s\n", i, subi + 1, test_description(t, subi)); } } @@ -606,6 +607,9 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 89a155092f85..84e87e31f119 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", map->nr == 20); for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", map->map[i] == i); + TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i); } perf_cpu_map__put(map); @@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", map->nr == 2); - TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); + TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256); TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); perf_cpu_map__put(map); return 0; diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index d01532d40acb..16b6d6f47f38 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -76,9 +76,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); TEST_ASSERT_VAL("wrong cpus", map->nr == 3); - TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1); - TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2); - TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3); + TEST_ASSERT_VAL("wrong cpus", map->map[0].cpu == 1); + TEST_ASSERT_VAL("wrong cpus", map->map[1].cpu == 2); + TEST_ASSERT_VAL("wrong cpus", map->map[2].cpu == 3); perf_cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index b17b86391383..f4a4aba33f76 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -31,7 +31,7 @@ static unsigned long *get_bitmap(const char *str, int nbits) if (map && bm) { for (i = 0; i < map->nr; i++) { - set_bit(map->map[i], bm); + set_bit(map->map[i].cpu, bm); } } diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 90b2feda31ac..0ad62914b4d7 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -59,11 +59,11 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest } CPU_ZERO(&cpu_set); - CPU_SET(cpus->map[0], &cpu_set); + CPU_SET(cpus->map[0].cpu, &cpu_set); sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf))); + cpus->map[0].cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index cd3dd463783f..1ab362323d25 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -22,7 +22,8 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { - int err = -1, fd, cpu; + int err = -1, fd, idx; + struct perf_cpu cpu; struct perf_cpu_map *cpus; struct evsel *evsel; unsigned int nr_openat_calls = 111, i; @@ -58,23 +59,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb goto out_evsel_delete; } - for (cpu = 0; cpu < cpus->nr; ++cpu) { - unsigned int ncalls = nr_openat_calls + cpu; + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + unsigned int ncalls = nr_openat_calls + idx; /* * XXX eventually lift this restriction in a way that * keeps perf building on older glibc installations * without CPU_ALLOC. 1024 cpus in 2010 still seems * a reasonable upper limit tho :-) */ - if (cpus->map[cpu] >= CPU_SETSIZE) { - pr_debug("Ignoring CPU %d\n", cpus->map[cpu]); + if (cpu.cpu >= CPU_SETSIZE) { + pr_debug("Ignoring CPU %d\n", cpu.cpu); continue; } - CPU_SET(cpus->map[cpu], &cpu_set); + CPU_SET(cpu.cpu, &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[cpu], + cpu.cpu, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_close_fd; } @@ -82,37 +83,29 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } - CPU_CLR(cpus->map[cpu], &cpu_set); + CPU_CLR(cpu.cpu, &cpu_set); } - /* - * Here we need to explicitly preallocate the counts, as if - * we use the auto allocation it will allocate just for 1 cpu, - * as we start by cpu 0. - */ - if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { - pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr); - goto out_close_fd; - } + evsel->core.cpus = perf_cpu_map__get(cpus); err = 0; - for (cpu = 0; cpu < cpus->nr; ++cpu) { + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { unsigned int expected; - if (cpus->map[cpu] >= CPU_SETSIZE) + if (cpu.cpu >= CPU_SETSIZE) continue; - if (evsel__read_on_cpu(evsel, cpu, 0) < 0) { + if (evsel__read_on_cpu(evsel, idx, 0) < 0) { pr_debug("evsel__read_on_cpu\n"); err = -1; break; } - expected = nr_openat_calls + cpu; - if (perf_counts(evsel->counts, cpu, 0)->val != expected) { + expected = nr_openat_calls + idx; + if (perf_counts(evsel->counts, idx, 0)->val != expected) { pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); + expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val); err = -1; } } diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index de24d374ce24..cb35e488809a 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -6,7 +6,7 @@ set -e for m in $(perf list --raw-dump metricgroups); do echo "Testing $m" - perf stat -M "$m" true + perf stat -M "$m" -a true done exit 0 diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c new file mode 100644 index 000000000000..1f147fe6595f --- /dev/null +++ b/tools/perf/tests/sigtrap.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic test for sigtrap support. + * + * Copyright (C) 2021, Google LLC. + */ + +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <linux/hw_breakpoint.h> +#include <linux/string.h> +#include <pthread.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "cloexec.h" +#include "debug.h" +#include "event.h" +#include "tests.h" +#include "../perf-sys.h" + +/* + * PowerPC and S390 do not support creation of instruction breakpoints using the + * perf_event interface. + * + * Just disable the test for these architectures until these issues are + * resolved. + */ +#if defined(__powerpc__) || defined(__s390x__) +#define BP_ACCOUNT_IS_SUPPORTED 0 +#else +#define BP_ACCOUNT_IS_SUPPORTED 1 +#endif + +#define NUM_THREADS 5 + +static struct { + int tids_want_signal; /* Which threads still want a signal. */ + int signal_count; /* Sanity check number of signals received. */ + volatile int iterate_on; /* Variable to set breakpoint on. */ + siginfo_t first_siginfo; /* First observed siginfo_t. */ +} ctx; + +#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on)) + +static struct perf_event_attr make_event_attr(void) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_BREAKPOINT, + .size = sizeof(attr), + .sample_period = 1, + .disabled = 1, + .bp_addr = (unsigned long)&ctx.iterate_on, + .bp_type = HW_BREAKPOINT_RW, + .bp_len = HW_BREAKPOINT_LEN_1, + .inherit = 1, /* Children inherit events ... */ + .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ + .remove_on_exec = 1, /* Required by sigtrap. */ + .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ + .sig_data = TEST_SIG_DATA, + .exclude_kernel = 1, /* To allow */ + .exclude_hv = 1, /* running as !root */ + }; + return attr; +} + +static void +sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused) +{ + if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED)) + ctx.first_siginfo = *info; + __atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED); +} + +static void *test_thread(void *arg) +{ + pthread_barrier_t *barrier = (pthread_barrier_t *)arg; + pid_t tid = syscall(SYS_gettid); + int i; + + pthread_barrier_wait(barrier); + + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + for (i = 0; i < ctx.iterate_on - 1; i++) + __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED); + + return NULL; +} + +static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier) +{ + int i; + + pthread_barrier_wait(barrier); + for (i = 0; i < NUM_THREADS; i++) + TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0); + + return TEST_OK; +} + +static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier) +{ + int ret; + + ctx.iterate_on = 3000; + + TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0); + TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0); + ret = run_test_threads(threads, barrier); + TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0); + + TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on); + TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0); + TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on); +#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */ + TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type, + PERF_TYPE_BREAKPOINT); + TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data, + TEST_SIG_DATA); +#endif + + return ret; +} + +static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused) +{ + struct perf_event_attr attr = make_event_attr(); + struct sigaction action = {}; + struct sigaction oldact; + pthread_t threads[NUM_THREADS]; + pthread_barrier_t barrier; + char sbuf[STRERR_BUFSIZE]; + int i, fd, ret = TEST_FAIL; + + if (!BP_ACCOUNT_IS_SUPPORTED) { + pr_debug("Test not supported on this architecture"); + return TEST_SKIP; + } + + pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1); + + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = sigtrap_handler; + sigemptyset(&action.sa_mask); + if (sigaction(SIGTRAP, &action, &oldact)) { + pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + goto out; + } + + fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); + if (fd < 0) { + pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + goto out_restore_sigaction; + } + + for (i = 0; i < NUM_THREADS; i++) { + if (pthread_create(&threads[i], NULL, test_thread, &barrier)) { + pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); + goto out_close_perf_event; + } + } + + ret = run_stress_test(fd, threads, &barrier); + +out_close_perf_event: + close(fd); +out_restore_sigaction: + sigaction(SIGTRAP, &oldact, NULL); +out: + pthread_barrier_destroy(&barrier); + return ret; +} + +DEFINE_SUITE("Sigtrap", sigtrap); diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c index 2eb096b5e6da..500974040fe3 100644 --- a/tools/perf/tests/stat.c +++ b/tools/perf/tests/stat.c @@ -87,7 +87,8 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub count.run = 300; TEST_ASSERT_VAL("failed to synthesize stat_config", - !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL)); + !perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3, + &count, process_stat_event, NULL)); return 0; } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8f65098110fc..5bbb8f6a48fc 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing); DECLARE_SUITE(expand_cgroup_events); DECLARE_SUITE(perf_time_to_tsc); DECLARE_SUITE(dlfilter); +DECLARE_SUITE(sigtrap); /* * PowerPC and S390 do not support creation of instruction breakpoints using the diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 869986139146..c4ef0c7002f1 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -112,62 +112,83 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu); for (i = 0; i < session->header.env.nr_cpus_avail; i++) { - if (!cpu_map__has(map, i)) + struct perf_cpu cpu = { .cpu = i }; + + if (!perf_cpu_map__has(map, cpu)) continue; pr_debug("CPU %d, core %d, socket %d\n", i, session->header.env.cpu[i].core_id, session->header.env.cpu[i].socket_id); } + // Test that CPU ID contains socket, die, core and CPU + for (i = 0; i < map->nr; i++) { + id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL); + TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i].cpu == id.cpu.cpu); + + TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", + session->header.env.cpu[map->map[i].cpu].core_id == id.core); + TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); + + TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", + session->header.env.cpu[map->map[i].cpu].die_id == id.die); + TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); + TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1); + } + // Test that core ID contains socket, die and core for (i = 0; i < map->nr; i++) { - id = cpu_map__get_core(map, i, NULL); + id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i]].core_id == id.core); + session->header.env.cpu[map->map[i].cpu].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[map->map[i].cpu].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } // Test that die ID contains socket and die for (i = 0; i < map->nr; i++) { - id = cpu_map__get_die(map, i, NULL); + id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[map->map[i]].die_id == id.die); + session->header.env.cpu[map->map[i].cpu].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1); } // Test that socket ID contains only socket for (i = 0; i < map->nr; i++) { - id = cpu_map__get_socket(map, i, NULL); + id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i]].socket_id == id.socket); + session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1); } // Test that node ID contains only node for (i = 0; i < map->nr; i++) { - id = cpu_map__get_node(map, i, NULL); + id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", cpu__get_node(map->map[i]) == id.node); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); + TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1); TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1); } perf_session__delete(session); diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index e81c2493efdf..44ba900828f6 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, .opts = opts, }; int ret = -1, err; + int not_annotated = list_empty(¬es->src->source); if (sym == NULL) return -1; @@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, if (ms->map->dso->annotate_warned) return -1; - err = symbol__annotate2(ms, evsel, opts, &browser.arch); - if (err) { - char msg[BUFSIZ]; - ms->map->dso->annotate_warned = true; - symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); - ui__error("Couldn't annotate %s:\n%s", sym->name, msg); - goto out_free_offsets; + if (not_annotated) { + err = symbol__annotate2(ms, evsel, opts, &browser.arch); + if (err) { + char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); + goto out_free_offsets; + } } ui_helpline__push("Press ESC to exit"); @@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, ret = annotate_browser__run(&browser, evsel, hbt); - annotated_source__purge(notes->src); + if(not_annotated) + annotated_source__purge(notes->src); out_free_offsets: - zfree(¬es->offsets); + if(not_annotated) + zfree(¬es->offsets); return ret; } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2e5bfbb69960..2a403cefcaf2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,3 +1,4 @@ +perf-y += arm64-frame-pointer-unwind-support.o perf-y += annotate.o perf-y += block-info.o perf-y += block-range.o @@ -144,6 +145,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o +perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index 7b12bd7a3080..f1e30d566db3 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -11,7 +11,7 @@ static int get_cpu_set_size(void) { - int sz = cpu__max_cpu() + 8 - 1; + int sz = cpu__max_cpu().cpu + 8 - 1; /* * sched_getaffinity doesn't like masks smaller than the kernel. * Hopefully that's big enough. diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 3fc528c9270c..5e390a1a79ab 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.phys_addr = ip; break; case ARM_SPE_COUNTER: + if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT) + decoder->record.latency = payload; break; case ARM_SPE_CONTEXT: decoder->record.context_id = payload; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 46a8556a9e95..69b31084d6be 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -33,6 +33,7 @@ struct arm_spe_record { enum arm_spe_sample_type type; int err; u32 op; + u32 latency; u64 from_ip; u64 to_ip; u64 timestamp; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index fccac06b573a..d2b64e3f588b 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -58,6 +58,8 @@ struct arm_spe { u8 sample_branch; u8 sample_remote_access; u8 sample_memory; + u8 sample_instructions; + u64 instructions_sample_period; u64 l1d_miss_id; u64 l1d_access_id; @@ -68,6 +70,7 @@ struct arm_spe { u64 branch_miss_id; u64 remote_access_id; u64 memory_id; + u64 instructions_id; u64 kernel_start; @@ -90,6 +93,7 @@ struct arm_spe_queue { u64 time; u64 timestamp; struct thread *thread; + u64 period_instructions; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe, speq->pid = -1; speq->tid = -1; speq->cpu = -1; + speq->period_instructions = 0; /* params set */ params.get_trace = arm_spe_get_trace; @@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq, sample.addr = record->virt_addr; sample.phys_addr = record->phys_addr; sample.data_src = data_src; + sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -347,6 +353,36 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; sample.addr = record->to_ip; + sample.weight = record->latency; + + return arm_spe_deliver_synth_event(spe, speq, event, &sample); +} + +static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, + u64 spe_events_id, u64 data_src) +{ + struct arm_spe *spe = speq->spe; + struct arm_spe_record *record = &speq->decoder->record; + union perf_event *event = speq->event_buf; + struct perf_sample sample = { .ip = 0, }; + + /* + * Handles perf instruction sampling period. + */ + speq->period_instructions++; + if (speq->period_instructions < spe->instructions_sample_period) + return 0; + speq->period_instructions = 0; + + arm_spe_prep_sample(spe, speq, event, &sample); + + sample.id = spe_events_id; + sample.stream_id = spe_events_id; + sample.addr = record->virt_addr; + sample.phys_addr = record->phys_addr; + sample.data_src = data_src; + sample.period = spe->instructions_sample_period; + sample.weight = record->latency; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -480,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } + if (spe->sample_instructions) { + err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src); + if (err) + return err; + } + return 0; } @@ -993,7 +1035,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.type = PERF_TYPE_HARDWARE; attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | - PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC; + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC | + PERF_SAMPLE_WEIGHT; if (spe->timeless_decoding) attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; else @@ -1107,7 +1150,29 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) return err; spe->memory_id = id; arm_spe_set_event_name(evlist, id, "memory"); + id += 1; + } + + if (spe->synth_opts.instructions) { + if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) { + pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n"); + goto synth_instructions_out; + } + if (spe->synth_opts.period > 1) + pr_warning("Arm SPE has a hardware-based sample period.\n" + "Additional instruction events will be discarded by --itrace\n"); + + spe->sample_instructions = true; + attr.config = PERF_COUNT_HW_INSTRUCTIONS; + attr.sample_period = spe->synth_opts.period; + spe->instructions_sample_period = attr.sample_period; + err = arm_spe_synth_event(session, &attr, id); + if (err) + return err; + spe->instructions_id = id; + arm_spe_set_event_name(evlist, id, "instructions"); } +synth_instructions_out: return 0; } diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.c b/tools/perf/util/arm64-frame-pointer-unwind-support.c new file mode 100644 index 000000000000..2242a885fbd7 --- /dev/null +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "arm64-frame-pointer-unwind-support.h" +#include "callchain.h" +#include "event.h" +#include "perf_regs.h" // SMPL_REG_MASK +#include "unwind.h" + +#define perf_event_arm_regs perf_event_arm64_regs +#include "../../arch/arm64/include/uapi/asm/perf_regs.h" +#undef perf_event_arm_regs + +struct entries { + u64 stack[2]; + size_t length; +}; + +static bool get_leaf_frame_caller_enabled(struct perf_sample *sample) +{ + return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs + && sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR); +} + +static int add_entry(struct unwind_entry *entry, void *arg) +{ + struct entries *entries = arg; + + entries->stack[entries->length++] = entry->ip; + return 0; +} + +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx) +{ + int ret; + struct entries entries = {}; + struct regs_dump old_regs = sample->user_regs; + + if (!get_leaf_frame_caller_enabled(sample)) + return 0; + + /* + * If PC and SP are not recorded, get the value of PC from the stack + * and set its mask. SP is not used when doing the unwinding but it + * still needs to be set to prevent failures. + */ + + if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) { + sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC); + sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1]; + } + + if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) { + sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP); + sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0; + } + + ret = unwind__get_entries(add_entry, &entries, thread, sample, 2); + sample->user_regs = old_regs; + + if (ret || entries.length != 2) + return ret; + + return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1]; +} diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h new file mode 100644 index 000000000000..32af9ce94398 --- /dev/null +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H +#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H + +#include "event.h" +#include "thread.h" + +u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx); + +#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c679394b898d..5632efc44738 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, mm->prev = 0; mm->idx = mp->idx; mm->tid = mp->tid; - mm->cpu = mp->cpu; + mm->cpu = mp->cpu.cpu; if (!mp->len) { mm->base = NULL; @@ -180,7 +180,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, else mp->tid = -1; } else { - mp->cpu = -1; + mp->cpu.cpu = -1; mp->tid = perf_thread_map__pid(evlist->core.threads, idx); } } @@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues, if (!queue->set) { queue->set = true; queue->tid = buffer->tid; - queue->cpu = buffer->cpu; + queue->cpu = buffer->cpu.cpu; } buffer->buffer_nr = queues->next_buffer_nr++; @@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, return 0; } -static bool filter_cpu(struct perf_session *session, int cpu) +static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu) { unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap; - return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap); + return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap); } static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, @@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues, struct auxtrace_buffer buffer = { .pid = -1, .tid = event->auxtrace.tid, - .cpu = event->auxtrace.cpu, + .cpu = { event->auxtrace.cpu }, .data_offset = data_offset, .offset = event->auxtrace.offset, .reference = event->auxtrace.reference, diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index bbf0d78c6401..19910b9011f3 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,6 +15,7 @@ #include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> +#include <internal/cpumap.h> #include <asm/bitsperlong.h> #include <asm/barrier.h> @@ -240,7 +241,7 @@ struct auxtrace_buffer { size_t size; pid_t pid; pid_t tid; - int cpu; + struct perf_cpu cpu; void *data; off_t data_offset; void *mmap_addr; @@ -350,7 +351,7 @@ struct auxtrace_mmap_params { int prot; int idx; pid_t tid; - int cpu; + struct perf_cpu cpu; }; /** diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index fbb3c4057c30..7ecfaac7536a 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -29,6 +29,9 @@ #include <internal/xyarray.h> +/* temporarily disable libbpf deprecation warnings */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)), const char *fmt, va_list args) { @@ -421,7 +424,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n, size_t prologue_cnt = 0; int i, err; - if (IS_ERR(priv) || !priv || priv->is_tp) + if (IS_ERR_OR_NULL(priv) || priv->is_tp) goto errout; pev = &priv->pev; @@ -570,7 +573,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) bool need_prologue = false; int err, i; - if (IS_ERR(priv) || !priv) { + if (IS_ERR_OR_NULL(priv)) { pr_debug("Internal error when hook preprocessor\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -642,8 +645,11 @@ int bpf__probe(struct bpf_object *obj) goto out; priv = bpf_program__priv(prog); - if (IS_ERR(priv) || !priv) { - err = PTR_ERR(priv); + if (IS_ERR_OR_NULL(priv)) { + if (!priv) + err = -BPF_LOADER_ERRNO__INTERNAL; + else + err = PTR_ERR(priv); goto out; } @@ -693,7 +699,7 @@ int bpf__unprobe(struct bpf_object *obj) struct bpf_prog_priv *priv = bpf_program__priv(prog); int i; - if (IS_ERR(priv) || !priv || priv->is_tp) + if (IS_ERR_OR_NULL(priv) || priv->is_tp) continue; for (i = 0; i < priv->pev.ntevs; i++) { @@ -751,7 +757,7 @@ int bpf__foreach_event(struct bpf_object *obj, struct perf_probe_event *pev; int i, fd; - if (IS_ERR(priv) || !priv) { + if (IS_ERR_OR_NULL(priv)) { pr_debug("bpf: failed to get private field\n"); return -BPF_LOADER_ERRNO__INTERNAL; } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index c17d4a43ce06..3ce8d03cb7ec 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel) return 0; } -static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, +static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bpf_prog_profiler_bpf *skel; @@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, assert(skel != NULL); ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu, &fd, BPF_ANY); + &cpu_map_idx, &fd, BPF_ANY); if (ret) return ret; } @@ -307,6 +307,20 @@ static bool bperf_attr_map_compatible(int attr_map_fd) (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); } +int __weak +bpf_map_create(enum bpf_map_type map_type, + const char *map_name __maybe_unused, + __u32 key_size, + __u32 value_size, + __u32 max_entries, + const struct bpf_map_create_opts *opts __maybe_unused) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + return bpf_create_map(map_type, key_size, value_size, max_entries, 0); +#pragma GCC diagnostic pop +} + static int bperf_lock_attr_map(struct target *target) { char path[PATH_MAX]; @@ -320,10 +334,10 @@ static int bperf_lock_attr_map(struct target *target) } if (access(path, F_OK)) { - map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(struct perf_event_attr), sizeof(struct perf_event_attr_map_entry), - ATTR_MAP_SIZE, 0); + ATTR_MAP_SIZE, NULL); if (map_fd < 0) return -1; @@ -540,7 +554,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) filter_type == BPERF_FILTER_TGID) key = evsel->core.threads->map[i].pid; else if (filter_type == BPERF_FILTER_CPU) - key = evsel->core.cpus->map[i]; + key = evsel->core.cpus->map[i].cpu; else break; @@ -566,12 +580,12 @@ out: return err; } -static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) +static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { struct bperf_leader_bpf *skel = evsel->leader_skel; return bpf_map_update_elem(bpf_map__fd(skel->maps.events), - &cpu, &fd, BPF_ANY); + &cpu_map_idx, &fd, BPF_ANY); } /* @@ -584,7 +598,7 @@ static int bperf_sync_counters(struct evsel *evsel) num_cpu = all_cpu_map->nr; for (i = 0; i < num_cpu; i++) { - cpu = all_cpu_map->map[i]; + cpu = all_cpu_map->map[i].cpu; bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); } return 0; @@ -605,15 +619,17 @@ static int bperf__disable(struct evsel *evsel) static int bperf__read(struct evsel *evsel) { struct bperf_follower_bpf *skel = evsel->follower_skel; - __u32 num_cpu_bpf = cpu__max_cpu(); + __u32 num_cpu_bpf = cpu__max_cpu().cpu; struct bpf_perf_event_value values[num_cpu_bpf]; int reading_map_fd, err = 0; - __u32 i, j, num_cpu; + __u32 i; + int j; bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + struct perf_cpu entry; __u32 cpu; err = bpf_map_lookup_elem(reading_map_fd, &i, values); @@ -623,16 +639,15 @@ static int bperf__read(struct evsel *evsel) case BPERF_FILTER_GLOBAL: assert(i == 0); - num_cpu = all_cpu_map->nr; - for (j = 0; j < num_cpu; j++) { - cpu = all_cpu_map->map[j]; + perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) { + cpu = entry.cpu; perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; } break; case BPERF_FILTER_CPU: - cpu = evsel->core.cpus->map[i]; + cpu = evsel->core.cpus->map[i].cpu; perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; perf_counts(evsel->counts, i, 0)->run = values[cpu].running; @@ -757,11 +772,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel) evsel->follower_skel == NULL; } -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) +int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd) { if (bpf_counter_skip(evsel)) return 0; - return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); + return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 65ebaa6694fb..4dbf26408b69 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel); typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel, struct target *target); typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, - int cpu, + int cpu_map_idx, int fd); struct bpf_counter_ops { @@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel); int bpf_counter__disable(struct evsel *evsel); int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); -int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); +int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd); #else /* HAVE_BPF_SKEL */ diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index cbc6c2bca488..631e34a0b66f 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist) struct cgroup *cgrp, *leader_cgrp; __u32 i, cpu; __u32 nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu(); + int total_cpus = cpu__max_cpu().cpu; int map_size, map_fd; int prog_fd, err; @@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist) for (cpu = 0; cpu < nr_cpus; cpu++) { int fd = FD(evsel, cpu); __u32 idx = evsel->core.idx * total_cpus + - evlist->core.all_cpus->map[cpu]; + evlist->core.all_cpus->map[cpu].cpu; err = bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); @@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist) int prog_fd = bpf_program__fd(skel->progs.trigger_read); for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i]; + cpu = evlist->core.all_cpus->map[i].cpu; bperf_trigger_reading(prog_fd, cpu); } @@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel) { struct evlist *evlist = evsel->evlist; int i, cpu, nr_cpus = evlist->core.all_cpus->nr; - int total_cpus = cpu__max_cpu(); + int total_cpus = cpu__max_cpu().cpu; struct perf_counts_values *counts; struct bpf_perf_event_value *values; int reading_map_fd, err = 0; @@ -272,7 +272,7 @@ static int bperf_cgrp__read(struct evsel *evsel) } for (i = 0; i < nr_cpus; i++) { - cpu = evlist->core.all_cpus->map[i]; + cpu = evlist->core.all_cpus->map[i].cpu; counts = perf_counts(evsel->counts, i, 0); counts->val = values[cpu].counter; diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c new file mode 100644 index 000000000000..d756cc66eef3 --- /dev/null +++ b/tools/perf/util/bpf_ftrace.c @@ -0,0 +1,152 @@ +#include <stdio.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdlib.h> + +#include <linux/err.h> + +#include "util/ftrace.h" +#include "util/cpumap.h" +#include "util/thread_map.h" +#include "util/debug.h" +#include "util/evlist.h" +#include "util/bpf_counter.h" + +#include "util/bpf_skel/func_latency.skel.h" + +static struct func_latency_bpf *skel; + +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) +{ + int fd, err; + int i, ncpus = 1, ntasks = 1; + struct filter_entry *func; + + if (!list_is_singular(&ftrace->filters)) { + pr_err("ERROR: %s target function(s).\n", + list_empty(&ftrace->filters) ? "No" : "Too many"); + return -1; + } + + func = list_first_entry(&ftrace->filters, struct filter_entry, list); + + skel = func_latency_bpf__open(); + if (!skel) { + pr_err("Failed to open func latency skeleton\n"); + return -1; + } + + /* don't need to set cpu filter for system-wide mode */ + if (ftrace->target.cpu_list) { + ncpus = perf_cpu_map__nr(ftrace->evlist->core.cpus); + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + } + + if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { + ntasks = perf_thread_map__nr(ftrace->evlist->core.threads); + bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + } + + set_max_rlimit(); + + err = func_latency_bpf__load(skel); + if (err) { + pr_err("Failed to load func latency skeleton\n"); + goto out; + } + + if (ftrace->target.cpu_list) { + u32 cpu; + u8 val = 1; + + skel->bss->has_cpu = 1; + fd = bpf_map__fd(skel->maps.cpu_filter); + + for (i = 0; i < ncpus; i++) { + cpu = perf_cpu_map__cpu(ftrace->evlist->core.cpus, i).cpu; + bpf_map_update_elem(fd, &cpu, &val, BPF_ANY); + } + } + + if (target__has_task(&ftrace->target) || target__none(&ftrace->target)) { + u32 pid; + u8 val = 1; + + skel->bss->has_task = 1; + fd = bpf_map__fd(skel->maps.task_filter); + + for (i = 0; i < ntasks; i++) { + pid = perf_thread_map__pid(ftrace->evlist->core.threads, i); + bpf_map_update_elem(fd, &pid, &val, BPF_ANY); + } + } + + skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin, + false, func->name); + if (IS_ERR(skel->links.func_begin)) { + pr_err("Failed to attach fentry program\n"); + err = PTR_ERR(skel->links.func_begin); + goto out; + } + + skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end, + true, func->name); + if (IS_ERR(skel->links.func_end)) { + pr_err("Failed to attach fexit program\n"); + err = PTR_ERR(skel->links.func_end); + goto out; + } + + /* XXX: we don't actually use this fd - just for poll() */ + return open("/dev/null", O_RDONLY); + +out: + return err; +} + +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + skel->bss->enabled = 1; + return 0; +} + +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + skel->bss->enabled = 0; + return 0; +} + +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, + int buckets[]) +{ + int i, fd, err; + u32 idx; + u64 *hist; + int ncpus = cpu__max_cpu().cpu; + + fd = bpf_map__fd(skel->maps.latency); + + hist = calloc(ncpus, sizeof(*hist)); + if (hist == NULL) + return -ENOMEM; + + for (idx = 0; idx < NUM_BUCKET; idx++) { + err = bpf_map_lookup_elem(fd, &idx, hist); + if (err) { + buckets[idx] = 0; + continue; + } + + for (i = 0; i < ncpus; i++) + buckets[idx] += hist[i]; + } + + free(hist); + return 0; +} + +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + func_latency_bpf__destroy(skel); + return 0; +} diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c new file mode 100644 index 000000000000..ea94187fe443 --- /dev/null +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Google +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +// This should be in sync with "util/ftrace.h" +#define NUM_BUCKET 22 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 10000); +} functime SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} cpu_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u8)); + __uint(max_entries, 1); +} task_filter SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, NUM_BUCKET); +} latency SEC(".maps"); + + +int enabled = 0; +int has_cpu = 0; +int has_task = 0; + +SEC("kprobe/func") +int BPF_PROG(func_begin) +{ + __u64 key, now; + + if (!enabled) + return 0; + + key = bpf_get_current_pid_tgid(); + + if (has_cpu) { + __u32 cpu = bpf_get_smp_processor_id(); + __u8 *ok; + + ok = bpf_map_lookup_elem(&cpu_filter, &cpu); + if (!ok) + return 0; + } + + if (has_task) { + __u32 pid = key & 0xffffffff; + __u8 *ok; + + ok = bpf_map_lookup_elem(&task_filter, &pid); + if (!ok) + return 0; + } + + now = bpf_ktime_get_ns(); + + // overwrite timestamp for nested functions + bpf_map_update_elem(&functime, &key, &now, BPF_ANY); + return 0; +} + +SEC("kretprobe/func") +int BPF_PROG(func_end) +{ + __u64 tid; + __u64 *start; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + + start = bpf_map_lookup_elem(&functime, &tid); + if (start) { + __s64 delta = bpf_ktime_get_ns() - *start; + __u32 key; + __u64 *hist; + + bpf_map_delete_elem(&functime, &tid); + + if (delta < 0) + return 0; + + // calculate index using delta in usec + for (key = 0; key < (NUM_BUCKET - 1); key++) { + if (delta < ((1000UL) << key)) + break; + } + + hist = bpf_map_lookup_elem(&latency, &key); + if (!hist) + return 0; + + *hist += 1; + } + + return 0; +} diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 8e2777133bd9..131207b91d15 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1600,7 +1600,7 @@ void callchain_cursor_reset(struct callchain_cursor *cursor) map__zput(node->ms.map); } -void callchain_param_setup(u64 sample_type) +void callchain_param_setup(u64 sample_type, const char *arch) { if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && @@ -1612,6 +1612,18 @@ void callchain_param_setup(u64 sample_type) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* + * It's necessary to use libunwind to reliably determine the caller of + * a leaf function on aarch64, as otherwise we cannot know whether to + * start from the LR or FP. + * + * Always starting from the LR can result in duplicate or entirely + * erroneous entries. Always skipping the LR and starting from the FP + * can result in missing entries. + */ + if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64")) + dwarf_callchain_users = true; } static bool chain_match(struct callchain_list *base_chain, diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 5824134f983b..d95615daed73 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, } #endif +void arch__add_leaf_frame_record_opts(struct record_opts *opts); + char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); char *callchain_node__scnprintf_value(struct callchain_node *node, @@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root, u64 *branch_count, u64 *predicted_count, u64 *abort_count, u64 *cycles_count); -void callchain_param_setup(u64 sample_type); +void callchain_param_setup(u64 sample_type, const char *arch); bool callchain_cnode_matched(struct callchain_node *base_cnode, struct callchain_node *pair_cnode); diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 582f3aeaf5e4..2b81707b9dba 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -4,6 +4,7 @@ #include <string.h> #include "evsel.h" #include "counts.h" +#include <perf/threadmap.h> #include <linux/zalloc.h> struct perf_counts *perf_counts__new(int ncpus, int nthreads) @@ -55,9 +56,12 @@ void evsel__reset_counts(struct evsel *evsel) perf_counts__reset(evsel->counts); } -int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads) +int evsel__alloc_counts(struct evsel *evsel) { - evsel->counts = perf_counts__new(ncpus, nthreads); + struct perf_cpu_map *cpus = evsel__cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); + + evsel->counts = perf_counts__new(cpus ? cpus->nr : 1, nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 7ff36bf6d644..5de275194f2b 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -18,21 +18,21 @@ struct perf_counts { static inline struct perf_counts_values* -perf_counts(struct perf_counts *counts, int cpu, int thread) +perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread) { - return xyarray__entry(counts->values, cpu, thread); + return xyarray__entry(counts->values, cpu_map_idx, thread); } static inline bool -perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread) +perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread) { - return *((bool *) xyarray__entry(counts->loaded, cpu, thread)); + return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)); } static inline void -perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded) +perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded) { - *((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded; + *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded; } struct perf_counts *perf_counts__new(int ncpus, int nthreads); @@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts); void perf_counts__reset(struct perf_counts *counts); void evsel__reset_counts(struct evsel *evsel); -int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads); +int evsel__alloc_counts(struct evsel *evsel); void evsel__free_counts(struct evsel *evsel); #endif /* __PERF_COUNTS_H */ diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 87d3eca9b872..12b2243222b0 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -13,9 +13,13 @@ #include <linux/ctype.h> #include <linux/zalloc.h> -static int max_cpu_num; -static int max_present_cpu_num; +static struct perf_cpu max_cpu_num; +static struct perf_cpu max_present_cpu_num; static int max_node_num; +/** + * The numa node X as read from /sys/devices/system/node/nodeX indexed by the + * CPU number. + */ static int *cpunode_map; static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) @@ -33,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) * otherwise it would become 65535. */ if (cpus->cpu[i] == (u16) -1) - map->map[i] = -1; + map->map[i].cpu = -1; else - map->map[i] = (int) cpus->cpu[i]; + map->map[i].cpu = (int) cpus->cpu[i]; } } @@ -54,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map int cpu, i = 0; for_each_set_bit(cpu, mask->mask, nbits) - map->map[i++] = cpu; + map->map[i++].cpu = cpu; } return map; @@ -87,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = -1; + cpus->map[i].cpu = -1; refcount_set(&cpus->refcnt, 1); } @@ -104,7 +108,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) cpus->nr = nr; for (i = 0; i < nr; i++) - cpus->map[i] = cpu_map__empty_aggr_cpu_id(); + cpus->map[i] = aggr_cpu_id__empty(); refcount_set(&cpus->refcnt, 1); } @@ -122,28 +126,21 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value) return sysfs__read_int(path, value); } -int cpu_map__get_socket_id(int cpu) +int cpu__get_socket_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value); return ret ?: value; } -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, - void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id = aggr_cpu_id__empty(); - if (idx > map->nr) - return id; - - cpu = map->map[idx]; - - id.socket = cpu_map__get_socket_id(cpu); + id.socket = cpu__get_socket_id(cpu); return id; } -static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) +static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) { struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; @@ -160,57 +157,64 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer) return a->thread - b->thread; } -int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), - void *data) +struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, + aggr_cpu_id_get_t get_id, + void *data) { - int nr = cpus->nr; - struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr); - int cpu, s2; - struct aggr_cpu_id s1; + int idx; + struct perf_cpu cpu; + struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr); if (!c) - return -1; + return NULL; /* Reset size as it may only be partially filled */ c->nr = 0; - for (cpu = 0; cpu < nr; cpu++) { - s1 = f(cpus, cpu, data); - for (s2 = 0; s2 < c->nr; s2++) { - if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2])) + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + bool duplicate = false; + struct aggr_cpu_id cpu_id = get_id(cpu, data); + + for (int j = 0; j < c->nr; j++) { + if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) { + duplicate = true; break; + } } - if (s2 == c->nr) { - c->map[c->nr] = s1; + if (!duplicate) { + c->map[c->nr] = cpu_id; c->nr++; } } + /* Trim. */ + if (c->nr != cpus->nr) { + struct cpu_aggr_map *trimmed_c = + realloc(c, + sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr); + + if (trimmed_c) + c = trimmed_c; + } /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id); + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); + + return c; - *res = c; - return 0; } -int cpu_map__get_die_id(int cpu) +int cpu__get_die_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "die_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value); return ret ?: value; } -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) { - int cpu, die; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id; + int die; - if (idx > map->nr) - return id; - - cpu = map->map[idx]; - - die = cpu_map__get_die_id(cpu); + die = cpu__get_die_id(cpu); /* There is no die_id on legacy system. */ if (die == -1) die = 0; @@ -220,79 +224,59 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat * with the socket ID and then add die to * make a unique ID. */ - id = cpu_map__get_socket(map, idx, data); - if (cpu_map__aggr_cpu_id_is_empty(id)) + id = aggr_cpu_id__socket(cpu, data); + if (aggr_cpu_id__is_empty(&id)) return id; id.die = die; return id; } -int cpu_map__get_core_id(int cpu) +int cpu__get_core_id(struct perf_cpu cpu) { - int value, ret = cpu__get_topology_int(cpu, "core_id", &value); + int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value); return ret ?: value; } -int cpu_map__get_node_id(int cpu) -{ - return cpu__get_node(cpu); -} - -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data) +struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data) { - int cpu; - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); - - if (idx > map->nr) - return id; + struct aggr_cpu_id id; + int core = cpu__get_core_id(cpu); - cpu = map->map[idx]; - - cpu = cpu_map__get_core_id(cpu); - - /* cpu_map__get_die returns a struct with socket and die set*/ - id = cpu_map__get_die(map, idx, data); - if (cpu_map__aggr_cpu_id_is_empty(id)) + /* aggr_cpu_id__die returns a struct with socket and die set. */ + id = aggr_cpu_id__die(cpu, data); + if (aggr_cpu_id__is_empty(&id)) return id; /* * core_id is relative to socket and die, we need a global id. * So we combine the result from cpu_map__get_die with the core id */ - id.core = cpu; + id.core = core; return id; + } -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused) +struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data) { - struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id(); + struct aggr_cpu_id id; - if (idx < 0 || idx >= map->nr) + /* aggr_cpu_id__core returns a struct with socket, die and core set. */ + id = aggr_cpu_id__core(cpu, data); + if (aggr_cpu_id__is_empty(&id)) return id; - id.node = cpu_map__get_node_id(map->map[idx]); + id.cpu = cpu; return id; -} -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp) -{ - return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); } -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep) +struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused) { - return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL); -} - -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep) -{ - return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); -} + struct aggr_cpu_id id = aggr_cpu_id__empty(); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap) -{ - return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL); + id.node = cpu__get_node(cpu); + return id; } /* setup simple routines to easily access node numbers given a cpu number */ @@ -335,8 +319,8 @@ static void set_max_cpu_num(void) int ret = -1; /* set up default */ - max_cpu_num = 4096; - max_present_cpu_num = 4096; + max_cpu_num.cpu = 4096; + max_present_cpu_num.cpu = 4096; mnt = sysfs__mountpoint(); if (!mnt) @@ -349,7 +333,7 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_cpu_num); + ret = get_max_num(path, &max_cpu_num.cpu); if (ret) goto out; @@ -360,11 +344,11 @@ static void set_max_cpu_num(void) goto out; } - ret = get_max_num(path, &max_present_cpu_num); + ret = get_max_num(path, &max_present_cpu_num.cpu); out: if (ret) - pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num); + pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); } /* Determine highest possible node in the system for sparse allocation */ @@ -403,31 +387,31 @@ int cpu__max_node(void) return max_node_num; } -int cpu__max_cpu(void) +struct perf_cpu cpu__max_cpu(void) { - if (unlikely(!max_cpu_num)) + if (unlikely(!max_cpu_num.cpu)) set_max_cpu_num(); return max_cpu_num; } -int cpu__max_present_cpu(void) +struct perf_cpu cpu__max_present_cpu(void) { - if (unlikely(!max_present_cpu_num)) + if (unlikely(!max_present_cpu_num.cpu)) set_max_cpu_num(); return max_present_cpu_num; } -int cpu__get_node(int cpu) +int cpu__get_node(struct perf_cpu cpu) { if (unlikely(cpunode_map == NULL)) { pr_debug("cpu_map not initialized\n"); return -1; } - return cpunode_map[cpu]; + return cpunode_map[cpu.cpu]; } static int init_cpunode_map(void) @@ -437,13 +421,13 @@ static int init_cpunode_map(void) set_max_cpu_num(); set_max_node_num(); - cpunode_map = calloc(max_cpu_num, sizeof(int)); + cpunode_map = calloc(max_cpu_num.cpu, sizeof(int)); if (!cpunode_map) { pr_err("%s: calloc failed\n", __func__); return -1; } - for (i = 0; i < max_cpu_num; i++) + for (i = 0; i < max_cpu_num.cpu; i++) cpunode_map[i] = -1; return 0; @@ -502,47 +486,39 @@ int cpu__setup_cpunode_map(void) return 0; } -bool cpu_map__has(struct perf_cpu_map *cpus, int cpu) -{ - return perf_cpu_map__idx(cpus, cpu) != -1; -} - -int cpu_map__cpu(struct perf_cpu_map *cpus, int idx) -{ - return cpus->map[idx]; -} - size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) { - int i, cpu, start = -1; + int i, start = -1; bool first = true; size_t ret = 0; #define COMMA first ? "" : "," for (i = 0; i < map->nr + 1; i++) { + struct perf_cpu cpu = { .cpu = INT_MAX }; bool last = i == map->nr; - cpu = last ? INT_MAX : map->map[i]; + if (!last) + cpu = map->map[i]; if (start == -1) { start = i; if (last) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[i]); + map->map[i].cpu); } - } else if (((i - start) != (cpu - map->map[start])) || last) { + } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) { int end = i - 1; if (start == end) { ret += snprintf(buf + ret, size - ret, "%s%d", COMMA, - map->map[start]); + map->map[start].cpu); } else { ret += snprintf(buf + ret, size - ret, "%s%d-%d", COMMA, - map->map[start], map->map[end]); + map->map[start].cpu, map->map[end].cpu); } first = false; start = i; @@ -569,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) int i, cpu; char *ptr = buf; unsigned char *bitmap; - int last_cpu = cpu_map__cpu(map, map->nr - 1); + struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1); if (buf == NULL) return 0; - bitmap = zalloc(last_cpu / 8 + 1); + bitmap = zalloc(last_cpu.cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; } for (i = 0; i < map->nr; i++) { - cpu = cpu_map__cpu(map, i); + cpu = perf_cpu_map__cpu(map, i).cpu; bitmap[cpu / 8] |= 1 << (cpu % 8); } - for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) { + for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { unsigned char bits = bitmap[cpu / 8]; if (cpu % 8) @@ -614,32 +590,35 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ return online; } -bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b) +bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) { - return a.thread == b.thread && - a.node == b.node && - a.socket == b.socket && - a.die == b.die && - a.core == b.core; + return a->thread == b->thread && + a->node == b->node && + a->socket == b->socket && + a->die == b->die && + a->core == b->core && + a->cpu.cpu == b->cpu.cpu; } -bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a) +bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) { - return a.thread == -1 && - a.node == -1 && - a.socket == -1 && - a.die == -1 && - a.core == -1; + return a->thread == -1 && + a->node == -1 && + a->socket == -1 && + a->die == -1 && + a->core == -1 && + a->cpu.cpu == -1; } -struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void) +struct aggr_cpu_id aggr_cpu_id__empty(void) { struct aggr_cpu_id ret = { .thread = -1, .node = -1, .socket = -1, .die = -1, - .core = -1 + .core = -1, + .cpu = (struct perf_cpu){ .cpu = -1 }, }; return ret; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index a27eeaf086e8..0d3c2006a15d 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -2,71 +2,135 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H +#include <stdbool.h> #include <stdio.h> #include <stdbool.h> #include <internal/cpumap.h> #include <perf/cpumap.h> +/** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { + /** A value in the range 0 to number of threads. */ int thread; + /** The numa node X as read from /sys/devices/system/node/nodeX. */ int node; + /** + * The socket number as read from + * /sys/devices/system/cpu/cpuX/topology/physical_package_id. + */ int socket; + /** The die id as read from /sys/devices/system/cpu/cpuX/topology/die_id. */ int die; + /** The core id as read from /sys/devices/system/cpu/cpuX/topology/core_id. */ int core; + /** CPU aggregation, note there is one CPU for each SMT thread. */ + struct perf_cpu cpu; }; +/** A collection of aggr_cpu_id values, the "built" version is sorted and uniqued. */ struct cpu_aggr_map { refcount_t refcnt; + /** Number of valid entries. */ int nr; + /** The entries. */ struct aggr_cpu_id map[]; }; struct perf_record_cpu_map_data; struct perf_cpu_map *perf_cpu_map__empty_new(int nr); -struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); struct perf_cpu_map *cpu_map__new_data(struct perf_record_cpu_map_data *data); size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size); size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp); -int cpu_map__get_socket_id(int cpu); -struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__get_die_id(int cpu); -struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__get_core_id(int cpu); -struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__get_node_id(int cpu); -struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data); -int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp); -int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep); -int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep); -int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **nodep); const struct perf_cpu_map *cpu_map__online(void); /* thread unsafe */ -static inline int cpu_map__socket(struct perf_cpu_map *sock, int s) +int cpu__setup_cpunode_map(void); + +int cpu__max_node(void); +struct perf_cpu cpu__max_cpu(void); +struct perf_cpu cpu__max_present_cpu(void); + +/** + * cpu_map__is_dummy - Events associated with a pid, rather than a CPU, use a single dummy map with an entry of -1. + */ +static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus) { - if (!sock || s > sock->nr || s < 0) - return 0; - return sock->map[s]; + return cpus->nr == 1 && cpus->map[0].cpu == -1; } -int cpu__setup_cpunode_map(void); +/** + * cpu__get_node - Returns the numa node X as read from + * /sys/devices/system/node/nodeX for the given CPU. + */ +int cpu__get_node(struct perf_cpu cpu); +/** + * cpu__get_socket_id - Returns the socket number as read from + * /sys/devices/system/cpu/cpuX/topology/physical_package_id for the given CPU. + */ +int cpu__get_socket_id(struct perf_cpu cpu); +/** + * cpu__get_die_id - Returns the die id as read from + * /sys/devices/system/cpu/cpuX/topology/die_id for the given CPU. + */ +int cpu__get_die_id(struct perf_cpu cpu); +/** + * cpu__get_core_id - Returns the core id as read from + * /sys/devices/system/cpu/cpuX/topology/core_id for the given CPU. + */ +int cpu__get_core_id(struct perf_cpu cpu); -int cpu__max_node(void); -int cpu__max_cpu(void); -int cpu__max_present_cpu(void); -int cpu__get_node(int cpu); +/** + * cpu_aggr_map__empty_new - Create a cpu_aggr_map of size nr with every entry + * being empty. + */ +struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr); + +typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data); + +/** + * cpu_aggr_map__new - Create a cpu_aggr_map with an aggr_cpu_id for each cpu in + * cpus. The aggr_cpu_id is created with 'get_id' that may have a data value + * passed to it. The cpu_aggr_map is sorted with duplicate values removed. + */ +struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, + aggr_cpu_id_get_t get_id, + void *data); -int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res, - struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data), - void *data); +bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); +bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); +struct aggr_cpu_id aggr_cpu_id__empty(void); -int cpu_map__cpu(struct perf_cpu_map *cpus, int idx); -bool cpu_map__has(struct perf_cpu_map *cpus, int cpu); -bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b); -bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a); -struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void); +/** + * aggr_cpu_id__socket - Create an aggr_cpu_id with the socket populated with + * the socket for cpu. The function signature is compatible with + * aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__die - Create an aggr_cpu_id with the die and socket populated + * with the die and socket for cpu. The function signature is compatible with + * aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the core, die and socket + * populated with the core, die and socket for cpu. The function signature is + * compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__core - Create an aggr_cpu_id with the cpu, core, die and socket + * populated with the cpu, core, die and socket for cpu. The function signature + * is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); +/** + * aggr_cpu_id__node - Create an aggr_cpu_id with the numa node populated for + * cpu. The function signature is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 51b429c86f98..e20b835a1194 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -165,7 +165,8 @@ static bool has_die_topology(void) if (uname(&uts) < 0) return false; - if (strncmp(uts.machine, "x86_64", 6)) + if (strncmp(uts.machine, "x86_64", 6) && + strncmp(uts.machine, "s390x", 5)) return false; scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT, @@ -187,7 +188,7 @@ struct cpu_topology *cpu_topology__new(void) struct perf_cpu_map *map; bool has_die = has_die_topology(); - ncpus = cpu__max_present_cpu(); + ncpus = cpu__max_present_cpu().cpu; /* build online CPU map */ map = perf_cpu_map__new(NULL); @@ -218,7 +219,7 @@ struct cpu_topology *cpu_topology__new(void) tp->core_cpus_list = addr; for (i = 0; i < nr; i++) { - if (!cpu_map__has(map, i)) + if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i })) continue; ret = build_cpu_topology(tp, i); @@ -333,7 +334,7 @@ struct numa_topology *numa_topology__new(void) tp->nr = nr; for (i = 0; i < nr; i++) { - if (load_numa_node(&tp->nodes[i], node_map->map[i])) { + if (load_numa_node(&tp->nodes[i], node_map->map[i].cpu)) { numa_topology__delete(tp); tp = NULL; break; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8f7705bbc2da..9e0aee276df8 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, offset = tmp_val; len = offset >> 16; offset &= 0xffff; + if (flags & TEP_FIELD_IS_RELATIVE) + offset += fmtf->offset + fmtf->size; } if (flags & TEP_FIELD_IS_ARRAY) { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2c06abf6dcd2..65e6c22f38e4 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -179,7 +179,7 @@ static int trace_event_printer(enum binary_printer_ops op, break; case BINARY_PRINT_CHAR_DATA: printed += color_fprintf(fp, color, "%c", - isprint(ch) ? ch : '.'); + isprint(ch) && isascii(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: printed += color_fprintf(fp, color, " "); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index b9904896eb97..579e44c59914 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -285,13 +285,13 @@ out_enomem: int perf_env__read_cpu_topology_map(struct perf_env *env) { - int cpu, nr_cpus; + int idx, nr_cpus; if (env->cpu != NULL) return 0; if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu(); + env->nr_cpus_avail = cpu__max_present_cpu().cpu; nr_cpus = env->nr_cpus_avail; if (nr_cpus == -1) @@ -301,10 +301,12 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) if (env->cpu == NULL) return -ENOMEM; - for (cpu = 0; cpu < nr_cpus; ++cpu) { - env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); - env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); - env->cpu[cpu].die_id = cpu_map__get_die_id(cpu); + for (idx = 0; idx < nr_cpus; ++idx) { + struct perf_cpu cpu = { .cpu = idx }; + + env->cpu[idx].core_id = cpu__get_core_id(cpu); + env->cpu[idx].socket_id = cpu__get_socket_id(cpu); + env->cpu[idx].die_id = cpu__get_die_id(cpu); } env->nr_cpus_avail = nr_cpus; @@ -381,7 +383,7 @@ static int perf_env__read_arch(struct perf_env *env) static int perf_env__read_nr_cpus_avail(struct perf_env *env) { if (env->nr_cpus_avail == 0) - env->nr_cpus_avail = cpu__max_present_cpu(); + env->nr_cpus_avail = cpu__max_present_cpu().cpu; return env->nr_cpus_avail ? 0 : -ENOENT; } @@ -487,7 +489,7 @@ const char *perf_env__pmu_mappings(struct perf_env *env) return env->pmu_mappings; } -int perf_env__numa_node(struct perf_env *env, int cpu) +int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) { if (!env->nr_numa_map) { struct numa_node *nn; @@ -495,7 +497,7 @@ int perf_env__numa_node(struct perf_env *env, int cpu) for (i = 0; i < env->nr_numa_nodes; i++) { nn = &env->numa_nodes[i]; - nr = max(nr, perf_cpu_map__max(nn->map)); + nr = max(nr, perf_cpu_map__max(nn->map).cpu); } nr++; @@ -514,13 +516,14 @@ int perf_env__numa_node(struct perf_env *env, int cpu) env->nr_numa_map = nr; for (i = 0; i < env->nr_numa_nodes; i++) { - int tmp, j; + struct perf_cpu tmp; + int j; nn = &env->numa_nodes[i]; - perf_cpu_map__for_each_cpu(j, tmp, nn->map) - env->numa_map[j] = i; + perf_cpu_map__for_each_cpu(tmp, j, nn->map) + env->numa_map[tmp.cpu] = i; } } - return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1; + return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 163e5ec503a2..a3541f98e1fc 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/rbtree.h> +#include "cpumap.h" #include "rwsem.h" struct perf_cpu_map; @@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); -int perf_env__numa_node(struct perf_env *env, int cpu); +int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5f92319ce258..6e88d404b5b3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -342,36 +342,65 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel) return perf_thread_map__nr(evlist->core.threads); } -void evlist__cpu_iter_start(struct evlist *evlist) -{ - struct evsel *pos; - - /* - * Reset the per evsel cpu_iter. This is needed because - * each evsel's cpumap may have a different index space, - * and some operations need the index to modify - * the FD xyarray (e.g. open, close) - */ - evlist__for_each_entry(evlist, pos) - pos->cpu_iter = 0; -} +struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity) +{ + struct evlist_cpu_iterator itr = { + .container = evlist, + .evsel = evlist__first(evlist), + .cpu_map_idx = 0, + .evlist_cpu_map_idx = 0, + .evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus), + .cpu = (struct perf_cpu){ .cpu = -1}, + .affinity = affinity, + }; -bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu) -{ - if (ev->cpu_iter >= ev->core.cpus->nr) - return true; - if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu) - return true; - return false; + if (itr.affinity) { + itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0); + affinity__set(itr.affinity, itr.cpu.cpu); + itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance through + * the list. + */ + if (itr.cpu_map_idx == -1) + evlist_cpu_iterator__next(&itr); + } + return itr; +} + +void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr) +{ + while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) { + evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel); + evlist_cpu_itr->cpu_map_idx = + perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, + evlist_cpu_itr->cpu); + if (evlist_cpu_itr->cpu_map_idx != -1) + return; + } + evlist_cpu_itr->evlist_cpu_map_idx++; + if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) { + evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container); + evlist_cpu_itr->cpu = + perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus, + evlist_cpu_itr->evlist_cpu_map_idx); + if (evlist_cpu_itr->affinity) + affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu); + evlist_cpu_itr->cpu_map_idx = + perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus, + evlist_cpu_itr->cpu); + /* + * If this CPU isn't in the evsel's cpu map then advance through + * the list. + */ + if (evlist_cpu_itr->cpu_map_idx == -1) + evlist_cpu_iterator__next(evlist_cpu_itr); + } } -bool evsel__cpu_iter_skip(struct evsel *ev, int cpu) +bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr) { - if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) { - ev->cpu_iter++; - return false; - } - return true; + return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr; } static int evsel__strcmp(struct evsel *pos, char *evsel_name) @@ -400,31 +429,26 @@ static int evlist__is_enabled(struct evlist *evlist) static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i, imm = 0; bool has_imm = false; if (affinity__setup(&affinity) < 0) return; /* Disable 'immediate' events last */ - for (imm = 0; imm <= 1; imm++) { - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - - evlist__for_each_entry(evlist, pos) { - if (evsel__strcmp(pos, evsel_name)) - continue; - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) - continue; - if (pos->immediate) - has_imm = true; - if (pos->immediate != imm) - continue; - evsel__disable_cpu(pos, pos->cpu_iter - 1); - } + for (int imm = 0; imm <= 1; imm++) { + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + pos = evlist_cpu_itr.evsel; + if (evsel__strcmp(pos, evsel_name)) + continue; + if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd) + continue; + if (pos->immediate) + has_imm = true; + if (pos->immediate != imm) + continue; + evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } if (!has_imm) break; @@ -462,24 +486,19 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name) static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i; if (affinity__setup(&affinity) < 0) return; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - - evlist__for_each_entry(evlist, pos) { - if (evsel__strcmp(pos, evsel_name)) - continue; - if (evsel__cpu_iter_skip(pos, cpu)) - continue; - if (!evsel__is_group_leader(pos) || !pos->core.fd) - continue; - evsel__enable_cpu(pos, pos->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + pos = evlist_cpu_itr.evsel; + if (evsel__strcmp(pos, evsel_name)) + continue; + if (!evsel__is_group_leader(pos) || !pos->core.fd) + continue; + evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } affinity__cleanup(&affinity); evlist__for_each_entry(evlist, pos) { @@ -800,7 +819,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx) static int perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp, - int output, int cpu) + int output, struct perf_cpu cpu) { struct mmap *map = container_of(_map, struct mmap, core); struct mmap_params *mp = container_of(_mp, struct mmap_params, core); @@ -1264,14 +1283,14 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel) void evlist__close(struct evlist *evlist) { struct evsel *evsel; + struct evlist_cpu_iterator evlist_cpu_itr; struct affinity affinity; - int cpu, i; /* * With perf record core.cpus is usually NULL. * Use the old method to handle this for now. */ - if (!evlist->core.cpus) { + if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) { evlist__for_each_entry_reverse(evlist, evsel) evsel__close(evsel); return; @@ -1279,15 +1298,12 @@ void evlist__close(struct evlist *evlist) if (affinity__setup(&affinity) < 0) return; - evlist__for_each_cpu(evlist, i, cpu) { - affinity__set(&affinity, cpu); - evlist__for_each_entry_reverse(evlist, evsel) { - if (evsel__cpu_iter_skip(evsel, cpu)) - continue; - perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1); - } + evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core, + evlist_cpu_itr.cpu_map_idx); } + affinity__cleanup(&affinity); evlist__for_each_entry_reverse(evlist, evsel) { perf_evsel__free_fd(&evsel->core); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 97bfb8d0be4f..64cba56fbc74 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -64,6 +64,7 @@ struct evlist { struct evsel *selected; struct events_stats stats; struct perf_env *env; + const char *hybrid_pmu_name; void (*trace_event_sample_raw)(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); @@ -110,6 +111,7 @@ int __evlist__add_default_attrs(struct evlist *evlist, __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) int arch_evlist__add_default_attrs(struct evlist *evlist); +struct evsel *arch_evlist__leader(struct list_head *list); int evlist__add_dummy(struct evlist *evlist); @@ -325,17 +327,53 @@ void evlist__to_front(struct evlist *evlist, struct evsel *move_evsel); #define evlist__for_each_entry_safe(evlist, tmp, evsel) \ __evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel) -#define evlist__for_each_cpu(evlist, index, cpu) \ - evlist__cpu_iter_start(evlist); \ - perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus) +/** Iterator state for evlist__for_each_cpu */ +struct evlist_cpu_iterator { + /** The list being iterated through. */ + struct evlist *container; + /** The current evsel of the iterator. */ + struct evsel *evsel; + /** The CPU map index corresponding to the evsel->core.cpus for the current CPU. */ + int cpu_map_idx; + /** + * The CPU map index corresponding to evlist->core.all_cpus for the + * current CPU. Distinct from cpu_map_idx as the evsel's cpu map may + * contain fewer entries. + */ + int evlist_cpu_map_idx; + /** The number of CPU map entries in evlist->core.all_cpus. */ + int evlist_cpu_map_nr; + /** The current CPU of the iterator. */ + struct perf_cpu cpu; + /** If present, used to set the affinity when switching between CPUs. */ + struct affinity *affinity; +}; + +/** + * evlist__for_each_cpu - without affinity, iterate over the evlist. With + * affinity, iterate over all CPUs and then the evlist + * for each evsel on that CPU. When switching between + * CPUs the affinity is set to the CPU to avoid IPIs + * during syscalls. + * @evlist_cpu_itr: the iterator instance. + * @evlist: evlist instance to iterate. + * @affinity: NULL or used to set the affinity to the current CPU. + */ +#define evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) \ + for ((evlist_cpu_itr) = evlist__cpu_begin(evlist, affinity); \ + !evlist_cpu_iterator__end(&evlist_cpu_itr); \ + evlist_cpu_iterator__next(&evlist_cpu_itr)) + +/** Returns an iterator set to the first CPU/evsel of evlist. */ +struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity); +/** Move to next element in iterator, updating CPU, evsel and the affinity. */ +void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr); +/** Returns true when iterator is at the end of the CPUs and evlist. */ +bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr); struct evsel *evlist__get_tracking_event(struct evlist *evlist); void evlist__set_tracking_event(struct evlist *evlist, struct evsel *tracking_evsel); -void evlist__cpu_iter_start(struct evlist *evlist); -bool evsel__cpu_iter_skip(struct evsel *ev, int cpu); -bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu); - struct evsel *evlist__find_evsel_by_str(struct evlist *evlist, const char *str); struct evsel *evlist__event2evsel(struct evlist *evlist, union perf_event *event); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ac0127be0459..2f6b18af49e5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1372,9 +1372,9 @@ int evsel__append_addr_filter(struct evsel *evsel, const char *filter) } /* Caller has to clear disabled after going through all CPUs. */ -int evsel__enable_cpu(struct evsel *evsel, int cpu) +int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx) { - return perf_evsel__enable_cpu(&evsel->core, cpu); + return perf_evsel__enable_cpu(&evsel->core, cpu_map_idx); } int evsel__enable(struct evsel *evsel) @@ -1387,9 +1387,9 @@ int evsel__enable(struct evsel *evsel) } /* Caller has to set disabled after going through all CPUs. */ -int evsel__disable_cpu(struct evsel *evsel, int cpu) +int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx) { - return perf_evsel__disable_cpu(&evsel->core, cpu); + return perf_evsel__disable_cpu(&evsel->core, cpu_map_idx); } int evsel__disable(struct evsel *evsel) @@ -1455,7 +1455,7 @@ void evsel__delete(struct evsel *evsel) free(evsel); } -void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, +void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -1463,12 +1463,12 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, if (!evsel->prev_raw_counts) return; - if (cpu == -1) { + if (cpu_map_idx == -1) { tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); - *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; } count->val = count->val - tmp.val; @@ -1476,46 +1476,28 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, count->run = count->run - tmp.run; } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled) +static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) { - s8 scaled = 0; + struct perf_counts_values *count = perf_counts(evsel->counts, cpu_map_idx, thread); - if (scale) { - if (count->run == 0) { - scaled = -1; - count->val = 0; - } else if (count->run < count->ena) { - scaled = 1; - count->val = (u64)((double) count->val * count->ena / count->run); - } - } - - if (pscaled) - *pscaled = scaled; -} - -static int evsel__read_one(struct evsel *evsel, int cpu, int thread) -{ - struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); - - return perf_evsel__read(&evsel->core, cpu, thread, count); + return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); } -static void evsel__set_count(struct evsel *counter, int cpu, int thread, u64 val, u64 ena, u64 run) +static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, + u64 val, u64 ena, u64 run) { struct perf_counts_values *count; - count = perf_counts(counter->counts, cpu, thread); + count = perf_counts(counter->counts, cpu_map_idx, thread); count->val = val; count->ena = ena; count->run = run; - perf_counts__set_loaded(counter->counts, cpu, thread, true); + perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); } -static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, u64 *data) +static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data) { u64 read_format = leader->core.attr.read_format; struct sample_read_value *v; @@ -1534,7 +1516,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, v = (struct sample_read_value *) data; - evsel__set_count(leader, cpu, thread, v[0].value, ena, run); + evsel__set_count(leader, cpu_map_idx, thread, v[0].value, ena, run); for (i = 1; i < nr; i++) { struct evsel *counter; @@ -1543,13 +1525,13 @@ static int evsel__process_group_data(struct evsel *leader, int cpu, int thread, if (!counter) return -EINVAL; - evsel__set_count(counter, cpu, thread, v[i].value, ena, run); + evsel__set_count(counter, cpu_map_idx, thread, v[i].value, ena, run); } return 0; } -static int evsel__read_group(struct evsel *leader, int cpu, int thread) +static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) { struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->core.attr.read_format; @@ -1570,67 +1552,67 @@ static int evsel__read_group(struct evsel *leader, int cpu, int thread) ps->group_data = data; } - if (FD(leader, cpu, thread) < 0) + if (FD(leader, cpu_map_idx, thread) < 0) return -EINVAL; - if (readn(FD(leader, cpu, thread), data, size) <= 0) + if (readn(FD(leader, cpu_map_idx, thread), data, size) <= 0) return -errno; - return evsel__process_group_data(leader, cpu, thread, data); + return evsel__process_group_data(leader, cpu_map_idx, thread, data); } -int evsel__read_counter(struct evsel *evsel, int cpu, int thread) +int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { u64 read_format = evsel->core.attr.read_format; if (read_format & PERF_FORMAT_GROUP) - return evsel__read_group(evsel, cpu, thread); + return evsel__read_group(evsel, cpu_map_idx, thread); - return evsel__read_one(evsel, cpu, thread); + return evsel__read_one(evsel, cpu_map_idx, thread); } -int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale) +int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale) { struct perf_counts_values count; size_t nv = scale ? 3 : 1; - if (FD(evsel, cpu, thread) < 0) + if (FD(evsel, cpu_map_idx, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) + if (evsel->counts == NULL && evsel__alloc_counts(evsel) < 0) return -ENOMEM; - if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) + if (readn(FD(evsel, cpu_map_idx, thread), &count, nv * sizeof(u64)) <= 0) return -errno; - evsel__compute_deltas(evsel, cpu, thread, &count); + evsel__compute_deltas(evsel, cpu_map_idx, thread, &count); perf_counts_values__scale(&count, scale, NULL); - *perf_counts(evsel->counts, cpu, thread) = count; + *perf_counts(evsel->counts, cpu_map_idx, thread) = count; return 0; } static int evsel__match_other_cpu(struct evsel *evsel, struct evsel *other, - int cpu) + int cpu_map_idx) { - int cpuid; + struct perf_cpu cpu; - cpuid = perf_cpu_map__cpu(evsel->core.cpus, cpu); - return perf_cpu_map__idx(other->core.cpus, cpuid); + cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); + return perf_cpu_map__idx(other->core.cpus, cpu); } -static int evsel__hybrid_group_cpu(struct evsel *evsel, int cpu) +static int evsel__hybrid_group_cpu_map_idx(struct evsel *evsel, int cpu_map_idx) { struct evsel *leader = evsel__leader(evsel); if ((evsel__is_hybrid(evsel) && !evsel__is_hybrid(leader)) || (!evsel__is_hybrid(evsel) && evsel__is_hybrid(leader))) { - return evsel__match_other_cpu(evsel, leader, cpu); + return evsel__match_other_cpu(evsel, leader, cpu_map_idx); } - return cpu; + return cpu_map_idx; } -static int get_group_fd(struct evsel *evsel, int cpu, int thread) +static int get_group_fd(struct evsel *evsel, int cpu_map_idx, int thread) { struct evsel *leader = evsel__leader(evsel); int fd; @@ -1644,11 +1626,11 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread) */ BUG_ON(!leader->core.fd); - cpu = evsel__hybrid_group_cpu(evsel, cpu); - if (cpu == -1) + cpu_map_idx = evsel__hybrid_group_cpu_map_idx(evsel, cpu_map_idx); + if (cpu_map_idx == -1) return -1; - fd = FD(leader, cpu, thread); + fd = FD(leader, cpu_map_idx, thread); BUG_ON(fd == -1); return fd; @@ -1662,16 +1644,16 @@ static void evsel__remove_fd(struct evsel *pos, int nr_cpus, int nr_threads, int } static int update_fds(struct evsel *evsel, - int nr_cpus, int cpu_idx, + int nr_cpus, int cpu_map_idx, int nr_threads, int thread_idx) { struct evsel *pos; - if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + if (cpu_map_idx >= nr_cpus || thread_idx >= nr_threads) return -EINVAL; evlist__for_each_entry(evsel->evlist, pos) { - nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + nr_cpus = pos != evsel ? nr_cpus : cpu_map_idx; evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); @@ -1685,10 +1667,10 @@ static int update_fds(struct evsel *evsel, return 0; } -bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, - struct perf_thread_map *threads, - int thread, int err) +static bool evsel__ignore_missing_thread(struct evsel *evsel, + int nr_cpus, int cpu_map_idx, + struct perf_thread_map *threads, + int thread, int err) { pid_t ignore_pid = perf_thread_map__pid(threads, thread); @@ -1711,7 +1693,7 @@ bool evsel__ignore_missing_thread(struct evsel *evsel, * We should remove fd for missing_thread first * because thread_map__remove() will decrease threads->nr. */ - if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + if (update_fds(evsel, nr_cpus, cpu_map_idx, threads->nr, thread)) return false; if (thread_map__remove(threads, thread)) @@ -1993,9 +1975,9 @@ bool evsel__increase_rlimit(enum rlimit_action *set_rlimit) static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, - int start_cpu, int end_cpu) + int start_cpu_map_idx, int end_cpu_map_idx) { - int cpu, thread, nthreads; + int idx, thread, nthreads; int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; @@ -2022,7 +2004,7 @@ fallback_missing_features: display_attr(&evsel->core.attr); - for (cpu = start_cpu; cpu < end_cpu; cpu++) { + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { for (thread = 0; thread < nthreads; thread++) { int fd, group_fd; @@ -2033,17 +2015,17 @@ retry_open: if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - group_fd = get_group_fd(evsel, cpu, thread); + group_fd = get_group_fd(evsel, idx, thread); test_attr__ready(); pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[cpu], group_fd, evsel->open_flags); + pid, cpus->map[idx].cpu, group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[cpu], + fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[idx].cpu, group_fd, evsel->open_flags); - FD(evsel, cpu, thread) = fd; + FD(evsel, idx, thread) = fd; if (fd < 0) { err = -errno; @@ -2053,10 +2035,10 @@ retry_open: goto try_fallback; } - bpf_counter__install_pe(evsel, cpu, fd); + bpf_counter__install_pe(evsel, idx, fd); if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], + test_attr__open(&evsel->core.attr, pid, cpus->map[idx], fd, group_fd, evsel->open_flags); } @@ -2097,7 +2079,7 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { + if (evsel__ignore_missing_thread(evsel, cpus->nr, idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ nthreads--; @@ -2112,7 +2094,7 @@ try_fallback: if (err == -EMFILE && evsel__increase_rlimit(&set_rlimit)) goto retry_open; - if (err != -EINVAL || cpu > 0 || thread > 0) + if (err != -EINVAL || idx > 0 || thread > 0) goto out_close; if (evsel__detect_missing_features(evsel)) @@ -2124,12 +2106,12 @@ out_close: old_errno = errno; do { while (--thread >= 0) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; } thread = nthreads; - } while (--cpu >= 0); + } while (--idx >= 0); errno = old_errno; return err; } @@ -2146,13 +2128,13 @@ void evsel__close(struct evsel *evsel) perf_evsel__free_id(&evsel->core); } -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu) +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) { - if (cpu == -1) + if (cpu_map_idx == -1) return evsel__open_cpu(evsel, cpus, NULL, 0, cpus ? cpus->nr : 1); - return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1); + return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); } int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads) @@ -2706,6 +2688,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } return sample->raw_data + offset; @@ -2950,6 +2934,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "wrong clockid (%d).", clockid); if (perf_missing_features.aux_output) return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel."); + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Invalid event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case ENODATA: return scnprintf(msg, size, "Cannot collect data source with the load latency event alone. " @@ -2973,15 +2961,15 @@ struct perf_env *evsel__env(struct evsel *evsel) static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) { - int cpu, thread; + int cpu_map_idx, thread; - for (cpu = 0; cpu < xyarray__max_x(evsel->core.fd); cpu++) { + for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { - int fd = FD(evsel, cpu, thread); + int fd = FD(evsel, cpu_map_idx, thread); if (perf_evlist__id_add_fd(&evlist->core, &evsel->core, - cpu, thread, fd) < 0) + cpu_map_idx, thread, fd) < 0) return -1; } } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 29d49a8c1e92..5720ceebffac 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -121,7 +121,6 @@ struct evsel { bool errored; struct hashmap *per_pkg_mask; int err; - int cpu_iter; struct { evsel__sb_cb_t *cb; void *data; @@ -195,9 +194,6 @@ static inline int evsel__nr_cpus(struct evsel *evsel) return evsel__cpus(evsel)->nr; } -void perf_counts_values__scale(struct perf_counts_values *count, - bool scale, s8 *pscaled); - void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, struct perf_counts_values *count); @@ -288,12 +284,12 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); int evsel__append_addr_filter(struct evsel *evsel, const char *filter); -int evsel__enable_cpu(struct evsel *evsel, int cpu); +int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx); int evsel__enable(struct evsel *evsel); int evsel__disable(struct evsel *evsel); -int evsel__disable_cpu(struct evsel *evsel, int cpu); +int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx); -int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu); +int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx); int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads); int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); @@ -305,10 +301,6 @@ bool evsel__detect_missing_features(struct evsel *evsel); enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX }; bool evsel__increase_rlimit(enum rlimit_action *set_rlimit); -bool evsel__ignore_missing_thread(struct evsel *evsel, - int nr_cpus, int cpu, - struct perf_thread_map *threads, - int thread, int err); bool evsel__precise_ip_fallback(struct evsel *evsel); struct perf_sample; @@ -337,32 +329,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) (e1->core.attr.config == e2->core.attr.config); } -int evsel__read_counter(struct evsel *evsel, int cpu, int thread); +int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); -int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale); +int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); /** * evsel__read_on_cpu - Read out the results on a CPU and thread * * @evsel - event selector to read value - * @cpu - CPU of interest + * @cpu_map_idx - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread) +static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread) { - return __evsel__read_on_cpu(evsel, cpu, thread, false); + return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false); } /** * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled * * @evsel - event selector to read value - * @cpu - CPU of interest + * @cpu_map_idx - CPU of interest * @thread - thread of interest */ -static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread) +static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread) { - return __evsel__read_on_cpu(evsel, cpu, thread, true); + return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } int evsel__parse_sample(struct evsel *evsel, union perf_event *event, diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 666b59baeb70..675f318ce7c1 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -405,12 +405,17 @@ double expr_id_data__source_count(const struct expr_id_data *data) double expr__get_literal(const char *literal) { static struct cpu_topology *topology; + double result = NAN; - if (!strcmp("#smt_on", literal)) - return smt_on() > 0 ? 1.0 : 0.0; + if (!strcasecmp("#smt_on", literal)) { + result = smt_on() > 0 ? 1.0 : 0.0; + goto out; + } - if (!strcmp("#num_cpus", literal)) - return cpu__max_present_cpu(); + if (!strcmp("#num_cpus", literal)) { + result = cpu__max_present_cpu().cpu; + goto out; + } /* * Assume that topology strings are consistent, such as CPUs "0-1" @@ -422,16 +427,24 @@ double expr__get_literal(const char *literal) topology = cpu_topology__new(); if (!topology) { pr_err("Error creating CPU topology"); - return NAN; + goto out; } } - if (!strcmp("#num_packages", literal)) - return topology->package_cpus_lists; - if (!strcmp("#num_dies", literal)) - return topology->die_cpus_lists; - if (!strcmp("#num_cores", literal)) - return topology->core_cpus_lists; + if (!strcmp("#num_packages", literal)) { + result = topology->package_cpus_lists; + goto out; + } + if (!strcmp("#num_dies", literal)) { + result = topology->die_cpus_lists; + goto out; + } + if (!strcmp("#num_cores", literal)) { + result = topology->core_cpus_lists; + goto out; + } pr_err("Unrecognized literal '%s'", literal); - return NAN; +out: + pr_debug2("literal: %s = %f\n", literal, result); + return result; } diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h new file mode 100644 index 000000000000..887f68a185f7 --- /dev/null +++ b/tools/perf/util/ftrace.h @@ -0,0 +1,81 @@ +#ifndef __PERF_FTRACE_H__ +#define __PERF_FTRACE_H__ + +#include <linux/list.h> + +#include "target.h" + +struct evlist; + +struct perf_ftrace { + struct evlist *evlist; + struct target target; + const char *tracer; + struct list_head filters; + struct list_head notrace; + struct list_head graph_funcs; + struct list_head nograph_funcs; + unsigned long percpu_buffer_size; + bool inherit; + int graph_depth; + int func_stack_trace; + int func_irq_info; + int graph_nosleep_time; + int graph_noirqs; + int graph_verbose; + int graph_thresh; + unsigned int initial_delay; +}; + +struct filter_entry { + struct list_head list; + char name[]; +}; + +#define NUM_BUCKET 22 /* 20 + 2 (for outliers in both direction) */ + +#ifdef HAVE_BPF_SKEL + +int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace); +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, + int buckets[]); +int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace); + +#else /* !HAVE_BPF_SKEL */ + +static inline int +perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, + int buckets[] __maybe_unused) +{ + return -1; +} + +static inline int +perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace __maybe_unused) +{ + return -1; +} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* __PERF_FTRACE_H__ */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e3c1a532d059..6da12e522edc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -472,7 +472,7 @@ static int write_nrcpus(struct feat_fd *ff, u32 nrc, nra; int ret; - nrc = cpu__max_present_cpu(); + nrc = cpu__max_present_cpu().cpu; nr = sysconf(_SC_NPROCESSORS_ONLN); if (nr < 0) @@ -1163,7 +1163,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) u32 nr, cpu; u16 level; - nr = cpu__max_cpu(); + nr = cpu__max_cpu().cpu; for (cpu = 0; cpu < nr; cpu++) { for (level = 0; level < MAX_CACHE_LVL; level++) { @@ -1195,7 +1195,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 *cntp) static int write_cache(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - u32 max_caches = cpu__max_cpu() * MAX_CACHE_LVL; + u32 max_caches = cpu__max_cpu().cpu * MAX_CACHE_LVL; struct cpu_cache_level caches[max_caches]; u32 cnt = 0, i, version = 1; int ret; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b776465e04ef..0a8033b09e28 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,7 +211,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); - hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); + hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); + if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 621f35ae1efa..2a15e22fb89c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,7 +75,8 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, - HISTC_P_STAGE_CYC, + HISTC_LOCAL_P_STAGE_CYC, + HISTC_GLOBAL_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index c397be0c2e32..15f60fd09424 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -23,7 +23,9 @@ #include "unwind.h" #include "libunwind-aarch64.h" +#define perf_event_arm_regs perf_event_arm64_regs #include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#undef perf_event_arm_regs #include "../../arch/arm64/util/unwind-libunwind.c" /* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fb8496df8432..3901440aeff9 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -34,6 +34,7 @@ #include "bpf-event.h" #include <internal/lib.h> // page_size #include "cgroup.h" +#include "arm64-frame-pointer-unwind-support.h" #include <linux/ctype.h> #include <symbol/kallsyms.h> @@ -2710,6 +2711,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, return err; } +static u64 get_leaf_frame_caller(struct perf_sample *sample, + struct thread *thread, int usr_idx) +{ + if (machine__normalized_is(thread->maps->machine, "arm64")) + return get_leaf_frame_caller_aarch64(sample, thread, usr_idx); + else + return 0; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct evsel *evsel, @@ -2723,9 +2733,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err, nr_entries; + int i, j, err, nr_entries, usr_idx; int skip_idx = -1; int first_call = 0; + u64 leaf_frame_caller; if (chain) chain_nr = chain->nr; @@ -2850,6 +2861,34 @@ check_calls: continue; } + /* + * PERF_CONTEXT_USER allows us to locate where the user stack ends. + * Depending on callchain_param.order and the position of PERF_CONTEXT_USER, + * the index will be different in order to add the missing frame + * at the right place. + */ + + usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1; + + if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) { + + leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx); + + /* + * check if leaf_frame_Caller != ip to not add the same + * value twice. + */ + + if (leaf_frame_caller && leaf_frame_caller != ip) { + + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, leaf_frame_caller, + false, NULL, NULL, 0); + if (err) + return (err < 0) ? err : 0; + } + } + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, false, NULL, NULL, 0); @@ -3079,14 +3118,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, } /* - * Compares the raw arch string. N.B. see instead perf_env__arch() if a - * normalized arch is needed. + * Compares the raw arch string. N.B. see instead perf_env__arch() or + * machine__normalized_is() if a normalized arch is needed. */ bool machine__is(struct machine *machine, const char *arch) { return machine && !strcmp(perf_env__raw_arch(machine->env), arch); } +bool machine__normalized_is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__arch(machine->env), arch); +} + int machine__nr_cpus_avail(struct machine *machine) { return machine ? perf_env__nr_cpus_avail(machine->env) : 0; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a143087eeb47..c5a45dc8df4c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -208,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine) } bool machine__is(struct machine *machine, const char *arch); +bool machine__normalized_is(struct machine *machine, const char *arch); int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 3167b4628b6d..ed0ab838bcc5 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -309,6 +309,9 @@ static const char * const mem_hops[] = { * to be set with mem_hops field. */ "core, same node", + "node, same socket", + "socket, same board", + "board", }; int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) @@ -316,7 +319,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) size_t i, l = 0; u64 m = PERF_MEM_LVL_NA; u64 hit, miss; - int printed; + int printed = 0; if (mem_info) m = mem_info->data_src.mem_lvl; @@ -335,18 +338,22 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) l += 7; } - if (mem_info && mem_info->data_src.mem_hops) + /* + * Incase mem_hops field is set, we can skip printing data source via + * PERF_MEM_LVL namespace. + */ + if (mem_info && mem_info->data_src.mem_hops) { l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); - - printed = 0; - for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (printed++) { - strcat(out, " or "); - l += 4; + } else { + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (printed++) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, mem_lvl[i]); } - l += scnprintf(out + l, sz - l, mem_lvl[i]); } if (mem_info && mem_info->data_src.mem_lvl_num) { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index fffe02aae3ed..d8492e339521 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -209,8 +209,8 @@ static struct metric *metric__new(const struct pmu_event *pe, m->metric_name = pe->metric_name; m->modifier = modifier ? strdup(modifier) : NULL; if (modifier && !m->modifier) { - free(m); expr__ctx_free(m->pctx); + free(m); return NULL; } m->metric_expr = pe->metric_expr; @@ -314,7 +314,7 @@ static int setup_metric_events(struct hashmap *ids, */ metric_id = evsel__metric_id(ev); evlist__for_each_entry_continue(metric_evlist, ev) { - if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) + if (!strcmp(evsel__metric_id(ev), metric_id)) ev->metric_leader = metric_events[i]; } } @@ -1115,13 +1115,27 @@ out: return ret; } +/** + * metric_list_cmp - list_sort comparator that sorts metrics with more events to + * the front. duration_time is excluded from the count. + */ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, const struct list_head *r) { const struct metric *left = container_of(l, struct metric, nd); const struct metric *right = container_of(r, struct metric, nd); + struct expr_id_data *data; + int left_count, right_count; + + left_count = hashmap__size(left->pctx->ids); + if (!expr__get_id(left->pctx, "duration_time", &data)) + left_count--; + + right_count = hashmap__size(right->pctx->ids); + if (!expr__get_id(right->pctx, "duration_time", &data)) + right_count--; - return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids); + return right_count - left_count; } /** @@ -1299,14 +1313,16 @@ err_out: /** * parse_ids - Build the event string for the ids and parse them creating an * evlist. The encoded metric_ids are decoded. + * @metric_no_merge: is metric sharing explicitly disabled. * @fake_pmu: used when testing metrics not supported by the current CPU. * @ids: the event identifiers parsed from a metric. * @modifier: any modifiers added to the events. * @has_constraint: false if events should be placed in a weak group. * @out_evlist: the created list of events. */ -static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, - const char *modifier, bool has_constraint, struct evlist **out_evlist) +static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu, + struct expr_parse_ctx *ids, const char *modifier, + bool has_constraint, struct evlist **out_evlist) { struct parse_events_error parse_error; struct evlist *parsed_evlist; @@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, int ret; *out_evlist = NULL; - if (hashmap__size(ids->ids) == 0) { + if (!metric_no_merge || hashmap__size(ids->ids) == 0) { char *tmp; /* - * No ids/events in the expression parsing context. Events may - * have been removed because of constant evaluation, e.g.: - * event1 if #smt_on else 0 + * We may fail to share events between metrics because + * duration_time isn't present in one metric. For example, a + * ratio of cache misses doesn't need duration_time but the same + * events may be used for a misses per second. Events without + * sharing implies multiplexing, that is best avoided, so place + * duration_time in every group. + * + * Also, there may be no ids/events in the expression parsing + * context because of constant evaluation, e.g.: + * event1 if #smt_on else 0 * Add a duration_time event to avoid a parse error on an empty * string. */ @@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, ret = build_combined_expr_ctx(&metric_list, &combined); if (!ret && combined && hashmap__size(combined->ids)) { - ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL, + ret = parse_ids(metric_no_merge, fake_pmu, combined, + /*modifier=*/NULL, /*has_constraint=*/true, &combined_evlist); } @@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } } if (!metric_evlist) { - ret = parse_ids(fake_pmu, m->pctx, m->modifier, + ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier, m->has_constraint, &m->evlist); if (ret) goto out; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 23ecdba9e670..12261ed8c15b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -94,7 +94,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } } -static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) +static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, int affinity) { void *data; size_t mmap_len; @@ -138,7 +138,7 @@ static void perf_mmap__aio_free(struct mmap *map, int idx) } static int perf_mmap__aio_bind(struct mmap *map __maybe_unused, int idx __maybe_unused, - int cpu __maybe_unused, int affinity __maybe_unused) + struct perf_cpu cpu __maybe_unused, int affinity __maybe_unused) { return 0; } @@ -240,7 +240,8 @@ void mmap__munmap(struct mmap *map) static void build_node_mask(int node, struct mmap_cpu_mask *mask) { - int c, cpu, nr_cpus; + int idx, nr_cpus; + struct perf_cpu cpu; const struct perf_cpu_map *cpu_map = NULL; cpu_map = cpu_map__online(); @@ -248,16 +249,16 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) return; nr_cpus = perf_cpu_map__nr(cpu_map); - for (c = 0; c < nr_cpus; c++) { - cpu = cpu_map->map[c]; /* map c index to online cpu index */ + for (idx = 0; idx < nr_cpus; idx++) { + cpu = cpu_map->map[idx]; /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - set_bit(cpu, mask->bits); + set_bit(cpu.cpu, mask->bits); } } static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { - map->affinity_mask.nbits = cpu__max_cpu(); + map->affinity_mask.nbits = cpu__max_cpu().cpu; map->affinity_mask.bits = bitmap_zalloc(map->affinity_mask.nbits); if (!map->affinity_mask.bits) return -1; @@ -265,12 +266,12 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params * if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - set_bit(map->core.cpu, map->affinity_mask.bits); + set_bit(map->core.cpu.cpu, map->affinity_mask.bits); return 0; } -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu) { if (perf_mmap__mmap(&map->core, &mp->core, fd, cpu)) { pr_debug2("failed to mmap perf event ring buffer, error %d\n", diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 8e259b9610f8..83f6bd4d4082 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/ring_buffer.h> #include <linux/bitops.h> +#include <perf/cpumap.h> #include <stdbool.h> #include <pthread.h> // for cpu_set_t #ifdef HAVE_AIO_SUPPORT @@ -52,7 +53,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu); +int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu); void mmap__munmap(struct mmap *map); union perf_event *perf_mmap__read_forward(struct mmap *map); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 608b20c72a5c..48aa3217300b 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -60,17 +60,49 @@ void namespaces__free(struct namespaces *namespaces) free(namespaces); } +static int nsinfo__get_nspid(struct nsinfo *nsi, const char *path) +{ + FILE *f = NULL; + char *statln = NULL; + size_t linesz = 0; + char *nspid; + + f = fopen(path, "r"); + if (f == NULL) + return -1; + + while (getline(&statln, &linesz, f) != -1) { + /* Use tgid if CONFIG_PID_NS is not defined. */ + if (strstr(statln, "Tgid:") != NULL) { + nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), + NULL, 10); + nsi->nstgid = nsi->tgid; + } + + if (strstr(statln, "NStgid:") != NULL) { + nspid = strrchr(statln, '\t'); + nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); + /* + * If innermost tgid is not the first, process is in a different + * PID namespace. + */ + nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; + break; + } + } + + fclose(f); + free(statln); + return 0; +} + int nsinfo__init(struct nsinfo *nsi) { char oldns[PATH_MAX]; char spath[PATH_MAX]; char *newns = NULL; - char *statln = NULL; - char *nspid; struct stat old_stat; struct stat new_stat; - FILE *f = NULL; - size_t linesz = 0; int rv = -1; if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) @@ -100,34 +132,9 @@ int nsinfo__init(struct nsinfo *nsi) if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX) goto out; - f = fopen(spath, "r"); - if (f == NULL) - goto out; - - while (getline(&statln, &linesz, f) != -1) { - /* Use tgid if CONFIG_PID_NS is not defined. */ - if (strstr(statln, "Tgid:") != NULL) { - nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'), - NULL, 10); - nsi->nstgid = nsi->tgid; - } - - if (strstr(statln, "NStgid:") != NULL) { - nspid = strrchr(statln, '\t'); - nsi->nstgid = (pid_t)strtol(nspid, NULL, 10); - /* If innermost tgid is not the first, process is in a different - * PID namespace. - */ - nsi->in_pidns = (statln + sizeof("NStgid:") - 1) != nspid; - break; - } - } - rv = 0; + rv = nsinfo__get_nspid(nsi, spath); out: - if (f != NULL) - (void) fclose(f); - free(statln); free(newns); return rv; } @@ -299,3 +306,12 @@ int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi) return ret; } + +bool nsinfo__is_in_root_namespace(void) +{ + struct nsinfo nsi; + + memset(&nsi, 0x0, sizeof(nsi)); + nsinfo__get_nspid(&nsi, "/proc/self/status"); + return !nsi.in_pidns; +} diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index ad9775db7b9c..9ceea9643507 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -59,6 +59,8 @@ void nsinfo__mountns_exit(struct nscookie *nc); char *nsinfo__realpath(const char *path, struct nsinfo *nsi); int nsinfo__stat(const char *filename, struct stat *st, struct nsinfo *nsi); +bool nsinfo__is_in_root_namespace(void); + static inline void __nsinfo__zput(struct nsinfo **nsip) { if (nsip) { diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 9fc86971027b..284f8eabd3b9 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -63,10 +63,13 @@ static int create_event_hybrid(__u32 config_type, int *idx, static int pmu_cmp(struct parse_events_state *parse_state, struct perf_pmu *pmu) { - if (!parse_state->hybrid_pmu_name) - return 0; + if (parse_state->evlist && parse_state->evlist->hybrid_pmu_name) + return strcmp(parse_state->evlist->hybrid_pmu_name, pmu->name); + + if (parse_state->hybrid_pmu_name) + return strcmp(parse_state->hybrid_pmu_name, pmu->name); - return strcmp(parse_state->hybrid_pmu_name, pmu->name); + return 0; } static int add_hw_hybrid(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ba74fdf74af9..acf20ce98ce9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1824,6 +1824,11 @@ out: return ret; } +__weak struct evsel *arch_evlist__leader(struct list_head *list) +{ + return list_first_entry(list, struct evsel, core.node); +} + void parse_events__set_leader(char *name, struct list_head *list, struct parse_events_state *parse_state) { @@ -1837,9 +1842,10 @@ void parse_events__set_leader(char *name, struct list_head *list, if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) return; - __perf_evlist__set_leader(list); - leader = list_entry(list->next, struct evsel, core.node); + leader = arch_evlist__leader(list); + __perf_evlist__set_leader(list, &leader->core); leader->group_name = name ? strdup(name) : NULL; + list_move(&leader->core.node, list); } /* list_event is assumed to point to malloc'ed memory */ diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 020411682a3c..734d006d9a8c 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -11,7 +11,7 @@ typedef void (*setup_probe_fn_t)(struct evsel *evsel); -static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) +static int perf_do_probe_api(setup_probe_fn_t fn, struct perf_cpu cpu, const char *str) { struct evlist *evlist; struct evsel *evsel; @@ -29,7 +29,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) evsel = evlist__first(evlist); while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); if (fd < 0) { if (pid == -1 && errno == EACCES) { pid = 0; @@ -43,7 +43,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) fn(evsel); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags); + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, flags); if (fd < 0) { if (errno == EINVAL) err = -EINVAL; @@ -61,7 +61,8 @@ static bool perf_probe_api(setup_probe_fn_t fn) { const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; struct perf_cpu_map *cpus; - int cpu, ret, i = 0; + struct perf_cpu cpu; + int ret, i = 0; cpus = perf_cpu_map__new(NULL); if (!cpus) @@ -136,15 +137,17 @@ bool perf_can_record_cpu_wide(void) .exclude_kernel = 1, }; struct perf_cpu_map *cpus; - int cpu, fd; + struct perf_cpu cpu; + int fd; cpus = perf_cpu_map__new(NULL); if (!cpus) return false; + cpu = cpus->map[0]; perf_cpu_map__put(cpus); - fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); + fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0); if (fd < 0) return false; close(fd); diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 06a7461ba864..a982e40ee5a9 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> +#include <string.h> #include "perf_regs.h" #include "event.h" @@ -20,6 +21,671 @@ uint64_t __weak arch__user_reg_mask(void) } #ifdef HAVE_PERF_REGS_SUPPORT + +#define perf_event_arm_regs perf_event_arm64_regs +#include "../../arch/arm64/include/uapi/asm/perf_regs.h" +#undef perf_event_arm_regs + +#include "../../arch/arm/include/uapi/asm/perf_regs.h" +#include "../../arch/csky/include/uapi/asm/perf_regs.h" +#include "../../arch/mips/include/uapi/asm/perf_regs.h" +#include "../../arch/powerpc/include/uapi/asm/perf_regs.h" +#include "../../arch/riscv/include/uapi/asm/perf_regs.h" +#include "../../arch/s390/include/uapi/asm/perf_regs.h" +#include "../../arch/x86/include/uapi/asm/perf_regs.h" + +static const char *__perf_reg_name_arm64(int id) +{ + switch (id) { + case PERF_REG_ARM64_X0: + return "x0"; + case PERF_REG_ARM64_X1: + return "x1"; + case PERF_REG_ARM64_X2: + return "x2"; + case PERF_REG_ARM64_X3: + return "x3"; + case PERF_REG_ARM64_X4: + return "x4"; + case PERF_REG_ARM64_X5: + return "x5"; + case PERF_REG_ARM64_X6: + return "x6"; + case PERF_REG_ARM64_X7: + return "x7"; + case PERF_REG_ARM64_X8: + return "x8"; + case PERF_REG_ARM64_X9: + return "x9"; + case PERF_REG_ARM64_X10: + return "x10"; + case PERF_REG_ARM64_X11: + return "x11"; + case PERF_REG_ARM64_X12: + return "x12"; + case PERF_REG_ARM64_X13: + return "x13"; + case PERF_REG_ARM64_X14: + return "x14"; + case PERF_REG_ARM64_X15: + return "x15"; + case PERF_REG_ARM64_X16: + return "x16"; + case PERF_REG_ARM64_X17: + return "x17"; + case PERF_REG_ARM64_X18: + return "x18"; + case PERF_REG_ARM64_X19: + return "x19"; + case PERF_REG_ARM64_X20: + return "x20"; + case PERF_REG_ARM64_X21: + return "x21"; + case PERF_REG_ARM64_X22: + return "x22"; + case PERF_REG_ARM64_X23: + return "x23"; + case PERF_REG_ARM64_X24: + return "x24"; + case PERF_REG_ARM64_X25: + return "x25"; + case PERF_REG_ARM64_X26: + return "x26"; + case PERF_REG_ARM64_X27: + return "x27"; + case PERF_REG_ARM64_X28: + return "x28"; + case PERF_REG_ARM64_X29: + return "x29"; + case PERF_REG_ARM64_SP: + return "sp"; + case PERF_REG_ARM64_LR: + return "lr"; + case PERF_REG_ARM64_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_arm(int id) +{ + switch (id) { + case PERF_REG_ARM_R0: + return "r0"; + case PERF_REG_ARM_R1: + return "r1"; + case PERF_REG_ARM_R2: + return "r2"; + case PERF_REG_ARM_R3: + return "r3"; + case PERF_REG_ARM_R4: + return "r4"; + case PERF_REG_ARM_R5: + return "r5"; + case PERF_REG_ARM_R6: + return "r6"; + case PERF_REG_ARM_R7: + return "r7"; + case PERF_REG_ARM_R8: + return "r8"; + case PERF_REG_ARM_R9: + return "r9"; + case PERF_REG_ARM_R10: + return "r10"; + case PERF_REG_ARM_FP: + return "fp"; + case PERF_REG_ARM_IP: + return "ip"; + case PERF_REG_ARM_SP: + return "sp"; + case PERF_REG_ARM_LR: + return "lr"; + case PERF_REG_ARM_PC: + return "pc"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_csky(int id) +{ + switch (id) { + case PERF_REG_CSKY_A0: + return "a0"; + case PERF_REG_CSKY_A1: + return "a1"; + case PERF_REG_CSKY_A2: + return "a2"; + case PERF_REG_CSKY_A3: + return "a3"; + case PERF_REG_CSKY_REGS0: + return "regs0"; + case PERF_REG_CSKY_REGS1: + return "regs1"; + case PERF_REG_CSKY_REGS2: + return "regs2"; + case PERF_REG_CSKY_REGS3: + return "regs3"; + case PERF_REG_CSKY_REGS4: + return "regs4"; + case PERF_REG_CSKY_REGS5: + return "regs5"; + case PERF_REG_CSKY_REGS6: + return "regs6"; + case PERF_REG_CSKY_REGS7: + return "regs7"; + case PERF_REG_CSKY_REGS8: + return "regs8"; + case PERF_REG_CSKY_REGS9: + return "regs9"; + case PERF_REG_CSKY_SP: + return "sp"; + case PERF_REG_CSKY_LR: + return "lr"; + case PERF_REG_CSKY_PC: + return "pc"; +#if defined(__CSKYABIV2__) + case PERF_REG_CSKY_EXREGS0: + return "exregs0"; + case PERF_REG_CSKY_EXREGS1: + return "exregs1"; + case PERF_REG_CSKY_EXREGS2: + return "exregs2"; + case PERF_REG_CSKY_EXREGS3: + return "exregs3"; + case PERF_REG_CSKY_EXREGS4: + return "exregs4"; + case PERF_REG_CSKY_EXREGS5: + return "exregs5"; + case PERF_REG_CSKY_EXREGS6: + return "exregs6"; + case PERF_REG_CSKY_EXREGS7: + return "exregs7"; + case PERF_REG_CSKY_EXREGS8: + return "exregs8"; + case PERF_REG_CSKY_EXREGS9: + return "exregs9"; + case PERF_REG_CSKY_EXREGS10: + return "exregs10"; + case PERF_REG_CSKY_EXREGS11: + return "exregs11"; + case PERF_REG_CSKY_EXREGS12: + return "exregs12"; + case PERF_REG_CSKY_EXREGS13: + return "exregs13"; + case PERF_REG_CSKY_EXREGS14: + return "exregs14"; + case PERF_REG_CSKY_TLS: + return "tls"; + case PERF_REG_CSKY_HI: + return "hi"; + case PERF_REG_CSKY_LO: + return "lo"; +#endif + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_mips(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +static const char *__perf_reg_name_powerpc(int id) +{ + switch (id) { + case PERF_REG_POWERPC_R0: + return "r0"; + case PERF_REG_POWERPC_R1: + return "r1"; + case PERF_REG_POWERPC_R2: + return "r2"; + case PERF_REG_POWERPC_R3: + return "r3"; + case PERF_REG_POWERPC_R4: + return "r4"; + case PERF_REG_POWERPC_R5: + return "r5"; + case PERF_REG_POWERPC_R6: + return "r6"; + case PERF_REG_POWERPC_R7: + return "r7"; + case PERF_REG_POWERPC_R8: + return "r8"; + case PERF_REG_POWERPC_R9: + return "r9"; + case PERF_REG_POWERPC_R10: + return "r10"; + case PERF_REG_POWERPC_R11: + return "r11"; + case PERF_REG_POWERPC_R12: + return "r12"; + case PERF_REG_POWERPC_R13: + return "r13"; + case PERF_REG_POWERPC_R14: + return "r14"; + case PERF_REG_POWERPC_R15: + return "r15"; + case PERF_REG_POWERPC_R16: + return "r16"; + case PERF_REG_POWERPC_R17: + return "r17"; + case PERF_REG_POWERPC_R18: + return "r18"; + case PERF_REG_POWERPC_R19: + return "r19"; + case PERF_REG_POWERPC_R20: + return "r20"; + case PERF_REG_POWERPC_R21: + return "r21"; + case PERF_REG_POWERPC_R22: + return "r22"; + case PERF_REG_POWERPC_R23: + return "r23"; + case PERF_REG_POWERPC_R24: + return "r24"; + case PERF_REG_POWERPC_R25: + return "r25"; + case PERF_REG_POWERPC_R26: + return "r26"; + case PERF_REG_POWERPC_R27: + return "r27"; + case PERF_REG_POWERPC_R28: + return "r28"; + case PERF_REG_POWERPC_R29: + return "r29"; + case PERF_REG_POWERPC_R30: + return "r30"; + case PERF_REG_POWERPC_R31: + return "r31"; + case PERF_REG_POWERPC_NIP: + return "nip"; + case PERF_REG_POWERPC_MSR: + return "msr"; + case PERF_REG_POWERPC_ORIG_R3: + return "orig_r3"; + case PERF_REG_POWERPC_CTR: + return "ctr"; + case PERF_REG_POWERPC_LINK: + return "link"; + case PERF_REG_POWERPC_XER: + return "xer"; + case PERF_REG_POWERPC_CCR: + return "ccr"; + case PERF_REG_POWERPC_SOFTE: + return "softe"; + case PERF_REG_POWERPC_TRAP: + return "trap"; + case PERF_REG_POWERPC_DAR: + return "dar"; + case PERF_REG_POWERPC_DSISR: + return "dsisr"; + case PERF_REG_POWERPC_SIER: + return "sier"; + case PERF_REG_POWERPC_MMCRA: + return "mmcra"; + case PERF_REG_POWERPC_MMCR0: + return "mmcr0"; + case PERF_REG_POWERPC_MMCR1: + return "mmcr1"; + case PERF_REG_POWERPC_MMCR2: + return "mmcr2"; + case PERF_REG_POWERPC_MMCR3: + return "mmcr3"; + case PERF_REG_POWERPC_SIER2: + return "sier2"; + case PERF_REG_POWERPC_SIER3: + return "sier3"; + case PERF_REG_POWERPC_PMC1: + return "pmc1"; + case PERF_REG_POWERPC_PMC2: + return "pmc2"; + case PERF_REG_POWERPC_PMC3: + return "pmc3"; + case PERF_REG_POWERPC_PMC4: + return "pmc4"; + case PERF_REG_POWERPC_PMC5: + return "pmc5"; + case PERF_REG_POWERPC_PMC6: + return "pmc6"; + case PERF_REG_POWERPC_SDAR: + return "sdar"; + case PERF_REG_POWERPC_SIAR: + return "siar"; + default: + break; + } + return NULL; +} + +static const char *__perf_reg_name_riscv(int id) +{ + switch (id) { + case PERF_REG_RISCV_PC: + return "pc"; + case PERF_REG_RISCV_RA: + return "ra"; + case PERF_REG_RISCV_SP: + return "sp"; + case PERF_REG_RISCV_GP: + return "gp"; + case PERF_REG_RISCV_TP: + return "tp"; + case PERF_REG_RISCV_T0: + return "t0"; + case PERF_REG_RISCV_T1: + return "t1"; + case PERF_REG_RISCV_T2: + return "t2"; + case PERF_REG_RISCV_S0: + return "s0"; + case PERF_REG_RISCV_S1: + return "s1"; + case PERF_REG_RISCV_A0: + return "a0"; + case PERF_REG_RISCV_A1: + return "a1"; + case PERF_REG_RISCV_A2: + return "a2"; + case PERF_REG_RISCV_A3: + return "a3"; + case PERF_REG_RISCV_A4: + return "a4"; + case PERF_REG_RISCV_A5: + return "a5"; + case PERF_REG_RISCV_A6: + return "a6"; + case PERF_REG_RISCV_A7: + return "a7"; + case PERF_REG_RISCV_S2: + return "s2"; + case PERF_REG_RISCV_S3: + return "s3"; + case PERF_REG_RISCV_S4: + return "s4"; + case PERF_REG_RISCV_S5: + return "s5"; + case PERF_REG_RISCV_S6: + return "s6"; + case PERF_REG_RISCV_S7: + return "s7"; + case PERF_REG_RISCV_S8: + return "s8"; + case PERF_REG_RISCV_S9: + return "s9"; + case PERF_REG_RISCV_S10: + return "s10"; + case PERF_REG_RISCV_S11: + return "s11"; + case PERF_REG_RISCV_T3: + return "t3"; + case PERF_REG_RISCV_T4: + return "t4"; + case PERF_REG_RISCV_T5: + return "t5"; + case PERF_REG_RISCV_T6: + return "t6"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_s390(int id) +{ + switch (id) { + case PERF_REG_S390_R0: + return "R0"; + case PERF_REG_S390_R1: + return "R1"; + case PERF_REG_S390_R2: + return "R2"; + case PERF_REG_S390_R3: + return "R3"; + case PERF_REG_S390_R4: + return "R4"; + case PERF_REG_S390_R5: + return "R5"; + case PERF_REG_S390_R6: + return "R6"; + case PERF_REG_S390_R7: + return "R7"; + case PERF_REG_S390_R8: + return "R8"; + case PERF_REG_S390_R9: + return "R9"; + case PERF_REG_S390_R10: + return "R10"; + case PERF_REG_S390_R11: + return "R11"; + case PERF_REG_S390_R12: + return "R12"; + case PERF_REG_S390_R13: + return "R13"; + case PERF_REG_S390_R14: + return "R14"; + case PERF_REG_S390_R15: + return "R15"; + case PERF_REG_S390_FP0: + return "FP0"; + case PERF_REG_S390_FP1: + return "FP1"; + case PERF_REG_S390_FP2: + return "FP2"; + case PERF_REG_S390_FP3: + return "FP3"; + case PERF_REG_S390_FP4: + return "FP4"; + case PERF_REG_S390_FP5: + return "FP5"; + case PERF_REG_S390_FP6: + return "FP6"; + case PERF_REG_S390_FP7: + return "FP7"; + case PERF_REG_S390_FP8: + return "FP8"; + case PERF_REG_S390_FP9: + return "FP9"; + case PERF_REG_S390_FP10: + return "FP10"; + case PERF_REG_S390_FP11: + return "FP11"; + case PERF_REG_S390_FP12: + return "FP12"; + case PERF_REG_S390_FP13: + return "FP13"; + case PERF_REG_S390_FP14: + return "FP14"; + case PERF_REG_S390_FP15: + return "FP15"; + case PERF_REG_S390_MASK: + return "MASK"; + case PERF_REG_S390_PC: + return "PC"; + default: + return NULL; + } + + return NULL; +} + +static const char *__perf_reg_name_x86(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; + +#define XMM(x) \ + case PERF_REG_X86_XMM ## x: \ + case PERF_REG_X86_XMM ## x + 1: \ + return "XMM" #x; + XMM(0) + XMM(1) + XMM(2) + XMM(3) + XMM(4) + XMM(5) + XMM(6) + XMM(7) + XMM(8) + XMM(9) + XMM(10) + XMM(11) + XMM(12) + XMM(13) + XMM(14) + XMM(15) +#undef XMM + default: + return NULL; + } + + return NULL; +} + +const char *perf_reg_name(int id, const char *arch) +{ + const char *reg_name = NULL; + + if (!strcmp(arch, "csky")) + reg_name = __perf_reg_name_csky(id); + else if (!strcmp(arch, "mips")) + reg_name = __perf_reg_name_mips(id); + else if (!strcmp(arch, "powerpc")) + reg_name = __perf_reg_name_powerpc(id); + else if (!strcmp(arch, "riscv")) + reg_name = __perf_reg_name_riscv(id); + else if (!strcmp(arch, "s390")) + reg_name = __perf_reg_name_s390(id); + else if (!strcmp(arch, "x86")) + reg_name = __perf_reg_name_x86(id); + else if (!strcmp(arch, "arm")) + reg_name = __perf_reg_name_arm(id); + else if (!strcmp(arch, "arm64")) + reg_name = __perf_reg_name_arm64(id); + + return reg_name ?: "unknown"; +} + int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { int i, idx = 0; diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index eeac181ebccf..ce1127af05e4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -11,8 +11,11 @@ struct sample_reg { const char *name; uint64_t mask; }; -#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } -#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } + +#define SMPL_REG_MASK(b) (1ULL << (b)) +#define SMPL_REG(n, b) { .name = #n, .mask = SMPL_REG_MASK(b) } +#define SMPL_REG2_MASK(b) (3ULL << (b)) +#define SMPL_REG2(n, b) { .name = #n, .mask = SMPL_REG2_MASK(b) } #define SMPL_REG_END { .name = NULL } enum { @@ -31,22 +34,16 @@ extern const struct sample_reg sample_reg_masks[]; #define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP)) +const char *perf_reg_name(int id, const char *arch); int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); -static inline const char *perf_reg_name(int id) -{ - const char *reg_name = __perf_reg_name(id); - - return reg_name ?: "unknown"; -} - #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 #define DWARF_MINIMAL_REGS PERF_REGS_MASK -static inline const char *perf_reg_name(int id __maybe_unused) +static inline const char *perf_reg_name(int id __maybe_unused, const char *arch __maybe_unused) { return "unknown"; } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 7f782a31bda3..f3e5131f183c 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -1057,7 +1059,7 @@ static struct mmap *get_md(struct evlist *evlist, int cpu) for (i = 0; i < evlist->core.nr_mmaps; i++) { struct mmap *md = &evlist->mmap[i]; - if (md->core.cpu == cpu) + if (md->core.cpu.cpu == cpu) return md; } @@ -1443,7 +1445,7 @@ error: * Dummy, to avoid dragging all the test_attr infrastructure in the python * binding. */ -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags) { } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index bff669b615ee..20461f174991 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call if (opts->group) evlist__set_leader(evlist); - if (evlist->core.cpus->map[0] < 0) + if (evlist->core.cpus->map[0].cpu < 0) opts->no_inherit = true; use_comm_exec = perf_can_comm_exec(); @@ -229,7 +229,8 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) { struct evlist *temp_evlist; struct evsel *evsel; - int err, fd, cpu; + int err, fd; + struct perf_cpu cpu = { .cpu = 0 }; bool ret = false; pid_t pid = -1; @@ -246,14 +247,16 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) if (!evlist || perf_cpu_map__empty(evlist->core.cpus)) { struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); - cpu = cpus ? cpus->map[0] : 0; + if (cpus) + cpu = cpus->map[0]; + perf_cpu_map__put(cpus); } else { cpu = evlist->core.cpus->map[0]; } while (1) { - fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, + fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu, -1, perf_event_open_cloexec_flag()); if (fd < 0) { if (pid == -1 && errno == EACCES) { diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 32a721b3e9a5..a5d945415bbc 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } else offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c0c010350bc2..f5ad0e62227a 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -36,6 +36,7 @@ #include "../debug.h" #include "../dso.h" #include "../callchain.h" +#include "../env.h" #include "../evsel.h" #include "../event.h" #include "../thread.h" @@ -687,7 +688,7 @@ static void set_sample_datasrc_in_dict(PyObject *dict, _PyUnicode_FromString(decode)); } -static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) +static void regs_map(struct regs_dump *regs, uint64_t mask, const char *arch, char *bf, int size) { unsigned int i = 0, r; int printed = 0; @@ -702,7 +703,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) printed += scnprintf(bf + printed, size - printed, "%5s:0x%" PRIx64 " ", - perf_reg_name(r), val); + perf_reg_name(r, arch), val); } } @@ -711,6 +712,7 @@ static void set_regs_in_dict(PyObject *dict, struct evsel *evsel) { struct perf_event_attr *attr = &evsel->core.attr; + const char *arch = perf_env__arch(evsel__env(evsel)); /* * Here value 28 is a constant size which can be used to print @@ -722,12 +724,12 @@ static void set_regs_in_dict(PyObject *dict, int size = __sw_hweight64(attr->sample_regs_intr) * 28; char bf[size]; - regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); + regs_map(&sample->intr_regs, attr->sample_regs_intr, arch, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "iregs", _PyUnicode_FromString(bf)); - regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf)); + regs_map(&sample->user_regs, attr->sample_regs_user, arch, bf, sizeof(bf)); pydict_set_item_string_decref(dict, "uregs", _PyUnicode_FromString(bf)); @@ -942,6 +944,8 @@ static void python_process_tracepoint(struct perf_sample *sample, offset = val; len = offset >> 16; offset &= 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; } if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { @@ -1553,7 +1557,7 @@ static void get_handler_name(char *str, size_t size, } static void -process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, +process_stat(struct evsel *counter, struct perf_cpu cpu, int thread, u64 tstamp, struct perf_counts_values *count) { PyObject *handler, *t; @@ -1573,7 +1577,7 @@ process_stat(struct evsel *counter, int cpu, int thread, u64 tstamp, return; } - PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu)); + PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu.cpu)); PyTuple_SetItem(t, n++, _PyLong_FromLong(thread)); tuple_set_u64(t, n++, tstamp); @@ -1597,7 +1601,7 @@ static void python_process_stat(struct perf_stat_config *config, int cpu, thread; if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, -1, -1, tstamp, + process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, &counter->counts->aggr); return; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d8857d1b6d7c..f19348dddd55 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,7 @@ #include "map_symbol.h" #include "branch.h" #include "debug.h" +#include "env.h" #include "evlist.h" #include "evsel.h" #include "memswap.h" @@ -1168,7 +1169,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) } } -static void regs_dump__printf(u64 mask, u64 *regs) +static void regs_dump__printf(u64 mask, u64 *regs, const char *arch) { unsigned rid, i = 0; @@ -1176,7 +1177,7 @@ static void regs_dump__printf(u64 mask, u64 *regs) u64 val = regs[i++]; printf(".... %-5s 0x%016" PRIx64 "\n", - perf_reg_name(rid), val); + perf_reg_name(rid, arch), val); } } @@ -1194,7 +1195,7 @@ static inline const char *regs_dump_abi(struct regs_dump *d) return regs_abi[d->abi]; } -static void regs__printf(const char *type, struct regs_dump *regs) +static void regs__printf(const char *type, struct regs_dump *regs, const char *arch) { u64 mask = regs->mask; @@ -1203,23 +1204,23 @@ static void regs__printf(const char *type, struct regs_dump *regs) mask, regs_dump_abi(regs)); - regs_dump__printf(mask, regs->regs); + regs_dump__printf(mask, regs->regs, arch); } -static void regs_user__printf(struct perf_sample *sample) +static void regs_user__printf(struct perf_sample *sample, const char *arch) { struct regs_dump *user_regs = &sample->user_regs; if (user_regs->regs) - regs__printf("user", user_regs); + regs__printf("user", user_regs, arch); } -static void regs_intr__printf(struct perf_sample *sample) +static void regs_intr__printf(struct perf_sample *sample, const char *arch) { struct regs_dump *intr_regs = &sample->intr_regs; if (intr_regs->regs) - regs__printf("intr", intr_regs); + regs__printf("intr", intr_regs, arch); } static void stack_user__printf(struct stack_dump *dump) @@ -1304,7 +1305,7 @@ char *get_page_size_name(u64 size, char *str) } static void dump_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample) + struct perf_sample *sample, const char *arch) { u64 sample_type; char str[PAGE_SIZE_NAME_LEN]; @@ -1325,10 +1326,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, branch_stack__printf(sample, evsel__has_branch_callstack(evsel)); if (sample_type & PERF_SAMPLE_REGS_USER) - regs_user__printf(sample); + regs_user__printf(sample, arch); if (sample_type & PERF_SAMPLE_REGS_INTR) - regs_intr__printf(sample); + regs_intr__printf(sample, arch); if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); @@ -1502,7 +1503,7 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unknown_id; return 0; } - dump_sample(evsel, event, sample); + dump_sample(evsel, event, sample, perf_env__arch(machine->env)); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; return 0; @@ -2537,15 +2538,15 @@ int perf_session__cpu_bitmap(struct perf_session *session, } for (i = 0; i < map->nr; i++) { - int cpu = map->map[i]; + struct perf_cpu cpu = map->map[i]; - if (cpu >= nr_cpus) { + if (cpu.cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " - "Consider raising MAX_NR_CPUS\n", cpu); + "Consider raising MAX_NR_CPUS\n", cpu.cpu); goto out_delete_map; } - set_bit(cpu, cpu_bitmap); + set_bit(cpu.cpu, cpu_bitmap); } err = 0; @@ -2597,7 +2598,7 @@ int perf_event__process_id_index(struct perf_session *session, if (!sid) return -ENOENT; sid->idx = e->idx; - sid->cpu = e->cpu; + sid->cpu.cpu = e->cpu; sid->tid = e->tid; } return 0; diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 34f1b1b1176c..2b0a36ebf27a 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -5,6 +5,56 @@ #include "api/fs/fs.h" #include "smt.h" +/** + * hweight_str - Returns the number of bits set in str. Stops at first non-hex + * or ',' character. + */ +static int hweight_str(char *str) +{ + int result = 0; + + while (*str) { + switch (*str++) { + case '0': + case ',': + break; + case '1': + case '2': + case '4': + case '8': + result++; + break; + case '3': + case '5': + case '6': + case '9': + case 'a': + case 'A': + case 'c': + case 'C': + result += 2; + break; + case '7': + case 'b': + case 'B': + case 'd': + case 'D': + case 'e': + case 'E': + result += 3; + break; + case 'f': + case 'F': + result += 4; + break; + default: + goto done; + } + } +done: + return result; +} + int smt_on(void) { static bool cached; @@ -15,9 +65,12 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) - goto done; + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) { + cached = true; + return cached_result; + } + cached_result = 0; ncpu = sysconf(_SC_NPROCESSORS_CONF); for (cpu = 0; cpu < ncpu; cpu++) { unsigned long long siblings; @@ -26,27 +79,21 @@ int smt_on(void) char fn[256]; snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/core_cpus", cpu); + "devices/system/cpu/cpu%d/topology/thread_siblings", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) { snprintf(fn, sizeof fn, - "devices/system/cpu/cpu%d/topology/thread_siblings", - cpu); + "devices/system/cpu/cpu%d/topology/core_cpus", cpu); if (sysfs__read_str(fn, &str, &strlen) < 0) continue; } /* Entry is hex, but does not have 0x, so need custom parser */ - siblings = strtoull(str, NULL, 16); + siblings = hweight_str(str); free(str); - if (hweight64(siblings) > 1) { + if (siblings > 1) { cached_result = 1; - cached = true; break; } } - if (!cached) { - cached_result = 0; -done: - cached = true; - } + cached = true; return cached_result; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a111065b484e..cfba8c337783 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -46,8 +46,8 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; -const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; -const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; +static const char *const dynamic_headers[] = {"local_ins_lat", "ins_lat", "local_p_stage_cyc", "p_stage_cyc"}; +static const char *const arch_specific_sort_keys[] = {"local_p_stage_cyc", "p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -1392,22 +1392,37 @@ struct sort_entry sort_global_ins_lat = { }; static int64_t -sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +sort__p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) { return left->p_stage_cyc - right->p_stage_cyc; } +static int hist_entry__global_p_stage_cyc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, + he->p_stage_cyc * he->stat.nr_events); +} + + static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return repsep_snprintf(bf, size, "%-*u", width, he->p_stage_cyc); } -struct sort_entry sort_p_stage_cyc = { - .se_header = "Pipeline Stage Cycle", - .se_cmp = sort__global_p_stage_cyc_cmp, +struct sort_entry sort_local_p_stage_cyc = { + .se_header = "Local Pipeline Stage Cycle", + .se_cmp = sort__p_stage_cyc_cmp, .se_snprintf = hist_entry__p_stage_cyc_snprintf, - .se_width_idx = HISTC_P_STAGE_CYC, + .se_width_idx = HISTC_LOCAL_P_STAGE_CYC, +}; + +struct sort_entry sort_global_p_stage_cyc = { + .se_header = "Pipeline Stage Cycle", + .se_cmp = sort__p_stage_cyc_cmp, + .se_snprintf = hist_entry__global_p_stage_cyc_snprintf, + .se_width_idx = HISTC_GLOBAL_P_STAGE_CYC, }; struct sort_entry sort_mem_daddr_sym = { @@ -1858,7 +1873,8 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), - DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), + DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc), + DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc), }; #undef DIM @@ -2365,6 +2381,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; + if (field->flags & TEP_FIELD_IS_RELATIVE) + offset += field->offset + field->size; /* record max width for output */ if (size > hde->dynamic_len) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7b7145501933..f994261888e1 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -235,7 +235,8 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, - SORT_PIPELINE_STAGE_CYC, + SORT_LOCAL_PIPELINE_STAGE_CYC, + SORT_GLOBAL_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 588601000f3f..5db83e51ceef 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -4,6 +4,7 @@ #include <linux/string.h> #include <linux/time64.h> #include <math.h> +#include <perf/cpumap.h> #include "color.h" #include "counts.h" #include "evlist.h" @@ -120,11 +121,10 @@ static void aggr_printout(struct perf_stat_config *config, id.die, config->csv_output ? 0 : -3, id.core, config->csv_sep); - } else if (id.core > -1) { + } else if (id.cpu.cpu > -1) { fprintf(config->output, "CPU%*d%s", config->csv_output ? 0 : -7, - evsel__cpus(evsel)->map[id.core], - config->csv_sep); + id.cpu.cpu, config->csv_sep); } break; case AGGR_THREAD: @@ -327,26 +327,24 @@ static void print_metric_header(struct perf_stat_config *config, fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_cpu(struct perf_stat_config *config, - struct evsel *evsel, struct aggr_cpu_id id) +static int first_shadow_cpu_map_idx(struct perf_stat_config *config, + struct evsel *evsel, const struct aggr_cpu_id *id) { - struct evlist *evlist = evsel->evlist; - int i; + struct perf_cpu_map *cpus = evsel__cpus(evsel); + struct perf_cpu cpu; + int idx; if (config->aggr_mode == AGGR_NONE) - return id.core; + return perf_cpu_map__idx(cpus, id->cpu); if (!config->aggr_get_id) return 0; - for (i = 0; i < evsel__nr_cpus(evsel); i++) { - int cpu2 = evsel__cpus(evsel)->map[i]; + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); - if (cpu_map__compare_aggr_cpu_id( - config->aggr_get_id(config, evlist->core.cpus, cpu2), - id)) { - return cpu2; - } + if (aggr_cpu_id__equal(&cpu_id, id)) + return idx; } return 0; } @@ -505,7 +503,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_cpu(config, counter, id), + first_shadow_cpu_map_idx(config, counter, &id), &out, &config->metric_events, st); if (!config->csv_output && !config->metric_only) { print_noise(config, counter, noise); @@ -516,23 +514,26 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int static void aggr_update_shadow(struct perf_stat_config *config, struct evlist *evlist) { - int cpu, s; + int idx, s; + struct perf_cpu cpu; struct aggr_cpu_id s2, id; u64 val; struct evsel *counter; + struct perf_cpu_map *cpus; for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; evlist__for_each_entry(evlist, counter) { + cpus = evsel__cpus(counter); val = 0; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - s2 = config->aggr_get_id(config, evlist->core.cpus, cpu); - if (!cpu_map__compare_aggr_cpu_id(s2, id)) + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + s2 = config->aggr_get_id(config, cpu); + if (!aggr_cpu_id__equal(&s2, &id)) continue; - val += perf_counts(counter->counts, cpu, 0)->val; + val += perf_counts(counter->counts, idx, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(config, counter, id), + first_shadow_cpu_map_idx(config, counter, &id), &rt_stat); } } @@ -627,25 +628,28 @@ struct aggr_data { u64 ena, run, val; struct aggr_cpu_id id; int nr; - int cpu; + int cpu_map_idx; }; static void aggr_cb(struct perf_stat_config *config, struct evsel *counter, void *data, bool first) { struct aggr_data *ad = data; - int cpu; + int idx; + struct perf_cpu cpu; + struct perf_cpu_map *cpus; struct aggr_cpu_id s2; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { + cpus = evsel__cpus(counter); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { struct perf_counts_values *counts; - s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu); - if (!cpu_map__compare_aggr_cpu_id(s2, ad->id)) + s2 = config->aggr_get_id(config, cpu); + if (!aggr_cpu_id__equal(&s2, &ad->id)) continue; if (first) ad->nr++; - counts = perf_counts(counter->counts, cpu, 0); + counts = perf_counts(counter->counts, idx, 0); /* * When any result is bad, make them all to give * consistent output in interval mode. @@ -665,7 +669,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first, int cpu) + bool *first, struct perf_cpu cpu) { struct aggr_data ad; FILE *output = config->output; @@ -695,10 +699,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu != -1) { - id = cpu_map__empty_aggr_cpu_id(); - id.core = cpu; - } + if (cpu.cpu != -1) + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); + printout(config, id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) @@ -731,8 +734,8 @@ static void print_aggr(struct perf_stat_config *config, first = true; evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, -1); + prefix, metric_only, + &first, (struct perf_cpu){ .cpu = -1 }); } if (metric_only) fputc('\n', output); @@ -778,7 +781,7 @@ static struct perf_aggr_thread_value *sort_aggr_thread( continue; buf[i].counter = counter; - buf[i].id = cpu_map__empty_aggr_cpu_id(); + buf[i].id = aggr_cpu_id__empty(); buf[i].id.thread = thread; buf[i].uval = uval; buf[i].val = val; @@ -866,7 +869,7 @@ static void print_counter_aggr(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(config, cpu_map__empty_aggr_cpu_id(), 0, counter, uval, prefix, cd.avg_running, + printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); @@ -878,9 +881,9 @@ static void counter_cb(struct perf_stat_config *config __maybe_unused, { struct aggr_data *ad = data; - ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; + ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; } /* @@ -893,11 +896,12 @@ static void print_counter(struct perf_stat_config *config, FILE *output = config->output; u64 ena, run, val; double uval; - int cpu; + int idx; + struct perf_cpu cpu; struct aggr_cpu_id id; - for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) { - struct aggr_data ad = { .cpu = cpu }; + perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { + struct aggr_data ad = { .cpu_map_idx = idx }; if (!collect_data(config, counter, counter_cb, &ad)) return; @@ -909,8 +913,7 @@ static void print_counter(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - id = cpu_map__empty_aggr_cpu_id(); - id.core = cpu; + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); printout(config, id, 0, counter, uval, prefix, run, ena, 1.0, &rt_stat); @@ -922,29 +925,32 @@ static void print_no_aggr_metric(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { - int cpu; - int nrcpus = 0; - struct evsel *counter; - u64 ena, run, val; - double uval; - struct aggr_cpu_id id; + int all_idx; + struct perf_cpu cpu; - nrcpus = evlist->core.cpus->nr; - for (cpu = 0; cpu < nrcpus; cpu++) { + perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.cpus) { + struct evsel *counter; bool first = true; if (prefix) fputs(prefix, config->output); evlist__for_each_entry(evlist, counter) { - id = cpu_map__empty_aggr_cpu_id(); - id.core = cpu; + u64 ena, run, val; + double uval; + struct aggr_cpu_id id; + int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); + + if (counter_idx < 0) + continue; + + id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { aggr_printout(config, counter, id, 0); first = false; } - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; + val = perf_counts(counter->counts, counter_idx, 0)->val; + ena = perf_counts(counter->counts, counter_idx, 0)->ena; + run = perf_counts(counter->counts, counter_idx, 0)->run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, @@ -1208,19 +1214,23 @@ static void print_percore_thread(struct perf_stat_config *config, { int s; struct aggr_cpu_id s2, id; + struct perf_cpu_map *cpus; bool first = true; + int idx; + struct perf_cpu cpu; - for (int i = 0; i < evsel__nr_cpus(counter); i++) { - s2 = config->aggr_get_id(config, evsel__cpus(counter), i); + cpus = evsel__cpus(counter); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + s2 = config->aggr_get_id(config, cpu); for (s = 0; s < config->aggr_map->nr; s++) { id = config->aggr_map->map[s]; - if (cpu_map__compare_aggr_cpu_id(s2, id)) + if (aggr_cpu_id__equal(&s2, &id)) break; } print_counter_aggrdata(config, counter, s, prefix, false, - &first, i); + &first, cpu); } } @@ -1243,8 +1253,8 @@ static void print_percore(struct perf_stat_config *config, fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, -1); + prefix, metric_only, + &first, (struct perf_cpu){ .cpu = -1 }); } if (metric_only) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 5c7308efa768..10af7804e482 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -32,7 +32,7 @@ struct saved_value { struct evsel *evsel; enum stat_type type; int ctx; - int cpu; + int cpu_map_idx; struct cgroup *cgrp; struct runtime_stat *stat; struct stats stats; @@ -47,8 +47,8 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->cpu != b->cpu) - return a->cpu - b->cpu; + if (a->cpu_map_idx != b->cpu_map_idx) + return a->cpu_map_idx - b->cpu_map_idx; /* * Previously the rbtree was used to link generic metrics. @@ -105,7 +105,7 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct evsel *evsel, - int cpu, + int cpu_map_idx, bool create, enum stat_type type, int ctx, @@ -115,7 +115,7 @@ static struct saved_value *saved_value_lookup(struct evsel *evsel, struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { - .cpu = cpu, + .cpu_map_idx = cpu_map_idx, .evsel = evsel, .type = type, .ctx = ctx, @@ -213,10 +213,10 @@ struct runtime_stat_data { static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, - int cpu, u64 count, + int cpu_map_idx, u64 count, struct runtime_stat_data *rsd) { - struct saved_value *v = saved_value_lookup(NULL, cpu, true, type, + struct saved_value *v = saved_value_lookup(NULL, cpu_map_idx, true, type, rsd->ctx, st, rsd->cgrp); if (v) @@ -229,7 +229,7 @@ static void update_runtime_stat(struct runtime_stat *st, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu, struct runtime_stat *st) + int cpu_map_idx, struct runtime_stat *st) { u64 count_ns = count; struct saved_value *v; @@ -241,88 +241,88 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, count *= counter->scale; if (evsel__is_clock(counter)) - update_runtime_stat(st, STAT_NSECS, cpu, count_ns, &rsd); + update_runtime_stat(st, STAT_NSECS, cpu_map_idx, count_ns, &rsd); else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_runtime_stat(st, STAT_CYCLES, cpu, count, &rsd); + update_runtime_stat(st, STAT_CYCLES, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu, count, &rsd); + update_runtime_stat(st, STAT_CYCLES_IN_TX, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_runtime_stat(st, STAT_TRANSACTION, cpu, count, &rsd); + update_runtime_stat(st, STAT_TRANSACTION, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_runtime_stat(st, STAT_ELISION, cpu, count, &rsd); + update_runtime_stat(st, STAT_ELISION, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) update_runtime_stat(st, STAT_TOPDOWN_RETIRING, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_HEAVY_OPS)) update_runtime_stat(st, STAT_TOPDOWN_HEAVY_OPS, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_BR_MISPREDICT)) update_runtime_stat(st, STAT_TOPDOWN_BR_MISPREDICT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_LAT)) update_runtime_stat(st, STAT_TOPDOWN_FETCH_LAT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, TOPDOWN_MEM_BOUND)) update_runtime_stat(st, STAT_TOPDOWN_MEM_BOUND, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, - cpu, count, &rsd); + cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_runtime_stat(st, STAT_BRANCHES, cpu, count, &rsd); + update_runtime_stat(st, STAT_BRANCHES, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_runtime_stat(st, STAT_CACHEREFS, cpu, count, &rsd); + update_runtime_stat(st, STAT_CACHEREFS, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_runtime_stat(st, STAT_L1_DCACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_L1_DCACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_runtime_stat(st, STAT_L1_ICACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_L1_ICACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_runtime_stat(st, STAT_LL_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_LL_CACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_runtime_stat(st, STAT_DTLB_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_DTLB_CACHE, cpu_map_idx, count, &rsd); else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_runtime_stat(st, STAT_ITLB_CACHE, cpu, count, &rsd); + update_runtime_stat(st, STAT_ITLB_CACHE, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_runtime_stat(st, STAT_SMI_NUM, cpu, count, &rsd); + update_runtime_stat(st, STAT_SMI_NUM, cpu_map_idx, count, &rsd); else if (perf_stat_evsel__is(counter, APERF)) - update_runtime_stat(st, STAT_APERF, cpu, count, &rsd); + update_runtime_stat(st, STAT_APERF, cpu_map_idx, count, &rsd); if (counter->collect_stat) { - v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st, + v = saved_value_lookup(counter, cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); update_stats(&v->stats, count); if (counter->metric_leader) v->metric_total += count; } else if (counter->metric_leader) { v = saved_value_lookup(counter->metric_leader, - cpu, true, STAT_NONE, 0, st, rsd.cgrp); + cpu_map_idx, true, STAT_NONE, 0, st, rsd.cgrp); v->metric_total += count; v->metric_other++; } @@ -464,12 +464,12 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) } static double runtime_stat_avg(struct runtime_stat *st, - enum stat_type type, int cpu, + enum stat_type type, int cpu_map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -477,12 +477,12 @@ static double runtime_stat_avg(struct runtime_stat *st, } static double runtime_stat_n(struct runtime_stat *st, - enum stat_type type, int cpu, + enum stat_type type, int cpu_map_idx, struct runtime_stat_data *rsd) { struct saved_value *v; - v = saved_value_lookup(NULL, cpu, false, type, rsd->ctx, st, rsd->cgrp); + v = saved_value_lookup(NULL, cpu_map_idx, false, type, rsd->ctx, st, rsd->cgrp); if (!v) return 0.0; @@ -490,7 +490,7 @@ static double runtime_stat_n(struct runtime_stat *st, } static void print_stalled_cycles_frontend(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -498,7 +498,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -513,7 +513,7 @@ static void print_stalled_cycles_frontend(struct perf_stat_config *config, } static void print_stalled_cycles_backend(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -521,7 +521,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -532,7 +532,7 @@ static void print_stalled_cycles_backend(struct perf_stat_config *config, } static void print_branch_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -540,7 +540,7 @@ static void print_branch_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_BRANCHES, cpu, rsd); + total = runtime_stat_avg(st, STAT_BRANCHES, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -551,7 +551,7 @@ static void print_branch_misses(struct perf_stat_config *config, } static void print_l1_dcache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -559,7 +559,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_L1_DCACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -570,7 +570,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config, } static void print_l1_icache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -578,7 +578,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_L1_ICACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -588,7 +588,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config, } static void print_dtlb_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -596,7 +596,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -606,7 +606,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config, } static void print_itlb_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -614,7 +614,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -624,7 +624,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config, } static void print_ll_cache_misses(struct perf_stat_config *config, - int cpu, double avg, + int cpu_map_idx, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -632,7 +632,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config, double total, ratio = 0.0; const char *color; - total = runtime_stat_avg(st, STAT_LL_CACHE, cpu, rsd); + total = runtime_stat_avg(st, STAT_LL_CACHE, cpu_map_idx, rsd); if (total) ratio = avg / total * 100.0; @@ -690,61 +690,61 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int cpu, struct runtime_stat *st, +static double td_total_slots(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu, rsd); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, cpu_map_idx, rsd); } -static double td_bad_spec(int cpu, struct runtime_stat *st, +static double td_bad_spec(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double bad_spec = 0; double total_slots; double total; - total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu, rsd) - - runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu, rsd) + - runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu, rsd); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, cpu_map_idx, rsd) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, cpu_map_idx, rsd) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, cpu_map_idx, rsd); - total_slots = td_total_slots(cpu, st, rsd); + total_slots = td_total_slots(cpu_map_idx, st, rsd); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int cpu, struct runtime_stat *st, +static double td_retiring(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double retiring = 0; - double total_slots = td_total_slots(cpu, st, rsd); + double total_slots = td_total_slots(cpu_map_idx, st, rsd); double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, - cpu, rsd); + cpu_map_idx, rsd); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int cpu, struct runtime_stat *st, +static double td_fe_bound(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { double fe_bound = 0; - double total_slots = td_total_slots(cpu, st, rsd); + double total_slots = td_total_slots(cpu_map_idx, st, rsd); double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, - cpu, rsd); + cpu_map_idx, rsd); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int cpu, struct runtime_stat *st, +static double td_be_bound(int cpu_map_idx, struct runtime_stat *st, struct runtime_stat_data *rsd) { - double sum = (td_fe_bound(cpu, st, rsd) + - td_bad_spec(cpu, st, rsd) + - td_retiring(cpu, st, rsd)); + double sum = (td_fe_bound(cpu_map_idx, st, rsd) + + td_bad_spec(cpu_map_idx, st, rsd) + + td_retiring(cpu_map_idx, st, rsd)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); @@ -755,15 +755,15 @@ static double td_be_bound(int cpu, struct runtime_stat *st, * the ratios we need to recreate the sum. */ -static double td_metric_ratio(int cpu, enum stat_type type, +static double td_metric_ratio(int cpu_map_idx, enum stat_type type, struct runtime_stat *stat, struct runtime_stat_data *rsd) { - double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) + - runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd); - double d = runtime_stat_avg(stat, type, cpu, rsd); + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd); + double d = runtime_stat_avg(stat, type, cpu_map_idx, rsd); if (sum) return d / sum; @@ -775,23 +775,23 @@ static double td_metric_ratio(int cpu, enum stat_type type, * We allow two missing. */ -static bool full_td(int cpu, struct runtime_stat *stat, +static bool full_td(int cpu_map_idx, struct runtime_stat *stat, struct runtime_stat_data *rsd) { int c = 0; - if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, cpu_map_idx, rsd) > 0) c++; - if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu, rsd) > 0) + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, cpu_map_idx, rsd) > 0) c++; return c >= 2; } -static void print_smi_cost(struct perf_stat_config *config, int cpu, +static void print_smi_cost(struct perf_stat_config *config, int cpu_map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st, struct runtime_stat_data *rsd) @@ -799,9 +799,9 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu, double smi_num, aperf, cycles, cost = 0.0; const char *color = NULL; - smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu, rsd); - aperf = runtime_stat_avg(st, STAT_APERF, cpu, rsd); - cycles = runtime_stat_avg(st, STAT_CYCLES, cpu, rsd); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, cpu_map_idx, rsd); + aperf = runtime_stat_avg(st, STAT_APERF, cpu_map_idx, rsd); + cycles = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, rsd); if ((cycles == 0) || (aperf == 0)) return; @@ -818,7 +818,7 @@ static void print_smi_cost(struct perf_stat_config *config, int cpu, static int prepare_metric(struct evsel **metric_events, struct metric_ref *metric_refs, struct expr_parse_ctx *pctx, - int cpu, + int cpu_map_idx, struct runtime_stat *st) { double scale; @@ -836,7 +836,7 @@ static int prepare_metric(struct evsel **metric_events, scale = 1e-9; source_count = 1; } else { - v = saved_value_lookup(metric_events[i], cpu, false, + v = saved_value_lookup(metric_events[i], cpu_map_idx, false, STAT_NONE, 0, st, metric_events[i]->cgrp); if (!v) @@ -874,7 +874,7 @@ static void generic_metric(struct perf_stat_config *config, const char *metric_name, const char *metric_unit, int runtime, - int cpu, + int cpu_map_idx, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -889,7 +889,7 @@ static void generic_metric(struct perf_stat_config *config, return; pctx->runtime = runtime; - i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); + i = prepare_metric(metric_events, metric_refs, pctx, cpu_map_idx, st); if (i < 0) { expr__ctx_free(pctx); return; @@ -934,7 +934,7 @@ static void generic_metric(struct perf_stat_config *config, expr__ctx_free(pctx); } -double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) +double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st) { struct expr_parse_ctx *pctx; double ratio = 0.0; @@ -943,7 +943,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu_map_idx, st) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -956,7 +956,7 @@ out: void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int cpu, + double avg, int cpu_map_idx, struct perf_stat_output_ctx *out, struct rblist *metric_events, struct runtime_stat *st) @@ -975,7 +975,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (config->iostat_run) { iostat_print_metric(config, evsel, out); } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); if (total) { ratio = avg / total; @@ -985,11 +985,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } - total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu, &rsd); + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, cpu_map_idx, &rsd); total = max(total, runtime_stat_avg(st, STAT_STALLED_CYCLES_BACK, - cpu, &rsd)); + cpu_map_idx, &rsd)); if (total && avg) { out->new_line(config, ctxp); @@ -999,8 +999,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ratio); } } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_stat_n(st, STAT_BRANCHES, cpu, &rsd) != 0) - print_branch_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_BRANCHES, cpu_map_idx, &rsd) != 0) + print_branch_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -1009,8 +1009,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_DCACHE, cpu, &rsd) != 0) - print_l1_dcache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_DCACHE, cpu_map_idx, &rsd) != 0) + print_l1_dcache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0); } else if ( @@ -1019,8 +1019,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_L1_ICACHE, cpu, &rsd) != 0) - print_l1_icache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_L1_ICACHE, cpu_map_idx, &rsd) != 0) + print_l1_icache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0); } else if ( @@ -1029,8 +1029,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu, &rsd) != 0) - print_dtlb_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_DTLB_CACHE, cpu_map_idx, &rsd) != 0) + print_dtlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0); } else if ( @@ -1039,8 +1039,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu, &rsd) != 0) - print_itlb_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_ITLB_CACHE, cpu_map_idx, &rsd) != 0) + print_itlb_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0); } else if ( @@ -1049,27 +1049,27 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_stat_n(st, STAT_LL_CACHE, cpu, &rsd) != 0) - print_ll_cache_misses(config, cpu, avg, out, st, &rsd); + if (runtime_stat_n(st, STAT_LL_CACHE, cpu_map_idx, &rsd) != 0) + print_ll_cache_misses(config, cpu_map_idx, avg, out, st, &rsd); else print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0); } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = runtime_stat_avg(st, STAT_CACHEREFS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CACHEREFS, cpu_map_idx, &rsd); if (total) ratio = avg * 100 / total; - if (runtime_stat_n(st, STAT_CACHEREFS, cpu, &rsd) != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, cpu_map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(config, cpu, avg, out, st, &rsd); + print_stalled_cycles_frontend(config, cpu_map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(config, cpu, avg, out, st, &rsd); + print_stalled_cycles_backend(config, cpu_map_idx, avg, out, st, &rsd); } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); if (total) { ratio = avg / total; @@ -1078,7 +1078,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); if (total) print_metric(config, ctxp, NULL, @@ -1088,8 +1088,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); - total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES, cpu_map_idx, &rsd); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (total2 < avg) total2 = avg; @@ -1099,19 +1099,19 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (avg) ratio = total / avg; - if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu, &rsd) != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd) != 0) print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu, &rsd); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, cpu_map_idx, &rsd); if (avg) ratio = total / avg; @@ -1124,28 +1124,28 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, else print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(cpu, st, &rsd); + double fe_bound = td_fe_bound(cpu_map_idx, st, &rsd); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(cpu, st, &rsd); + double retiring = td_retiring(cpu_map_idx, st, &rsd); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(cpu, st, &rsd); + double bad_spec = td_bad_spec(cpu_map_idx, st, &rsd); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(cpu, st, &rsd); + double be_bound = td_be_bound(cpu_map_idx, st, &rsd); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -1158,14 +1158,14 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(cpu, st, &rsd) > 0) + if (td_total_slots(cpu_map_idx, st, &rsd) > 0) print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && - full_td(cpu, st, &rsd)) { - double retiring = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double retiring = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); if (retiring > 0.7) @@ -1173,8 +1173,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && - full_td(cpu, st, &rsd)) { - double fe_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double fe_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); if (fe_bound > 0.2) @@ -1182,8 +1182,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && - full_td(cpu, st, &rsd)) { - double be_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double be_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); if (be_bound > 0.2) @@ -1191,8 +1191,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "backend bound", be_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && - full_td(cpu, st, &rsd)) { - double bad_spec = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd)) { + double bad_spec = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); if (bad_spec > 0.1) @@ -1200,11 +1200,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double retiring = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double retiring = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_RETIRING, st, &rsd); - double heavy_ops = td_metric_ratio(cpu, + double heavy_ops = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_HEAVY_OPS, st, &rsd); double light_ops = retiring - heavy_ops; @@ -1220,11 +1220,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "light operations", light_ops * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double bad_spec = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double bad_spec = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BAD_SPEC, st, &rsd); - double br_mis = td_metric_ratio(cpu, + double br_mis = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BR_MISPREDICT, st, &rsd); double m_clears = bad_spec - br_mis; @@ -1240,11 +1240,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "machine clears", m_clears * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double fe_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double fe_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FE_BOUND, st, &rsd); - double fetch_lat = td_metric_ratio(cpu, + double fetch_lat = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_FETCH_LAT, st, &rsd); double fetch_bw = fe_bound - fetch_lat; @@ -1260,11 +1260,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth", fetch_bw * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) && - full_td(cpu, st, &rsd) && (config->topdown_level > 1)) { - double be_bound = td_metric_ratio(cpu, + full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) { + double be_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_BE_BOUND, st, &rsd); - double mem_bound = td_metric_ratio(cpu, + double mem_bound = td_metric_ratio(cpu_map_idx, STAT_TOPDOWN_MEM_BOUND, st, &rsd); double core_bound = be_bound - mem_bound; @@ -1281,12 +1281,12 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, core_bound * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, - evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); - } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { + evsel->name, evsel->metric_name, NULL, 1, cpu_map_idx, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, cpu_map_idx, &rsd) != 0) { char unit = ' '; char unit_buf[10] = "/sec"; - total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); + total = runtime_stat_avg(st, STAT_NSECS, cpu_map_idx, &rsd); if (total) ratio = convert_unit_double(1000000000.0 * avg / total, &unit); @@ -1294,7 +1294,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(config, cpu, out, st, &rsd); + print_smi_cost(config, cpu_map_idx, out, st, &rsd); } else { num = 0; } @@ -1307,7 +1307,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, out->new_line(config, ctxp); generic_metric(config, mexp->metric_expr, mexp->metric_events, mexp->metric_refs, evsel->name, mexp->metric_name, - mexp->metric_unit, mexp->runtime, cpu, out, st); + mexp->metric_unit, mexp->runtime, cpu_map_idx, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 09ea334586f2..ee6f03481215 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -152,11 +152,13 @@ static void evsel__free_stat_priv(struct evsel *evsel) zfree(&evsel->stats); } -static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads) +static int evsel__alloc_prev_raw_counts(struct evsel *evsel) { + int cpu_map_nr = evsel__nr_cpus(evsel); + int nthreads = perf_thread_map__nr(evsel->core.threads); struct perf_counts *counts; - counts = perf_counts__new(ncpus, nthreads); + counts = perf_counts__new(cpu_map_nr, nthreads); if (counts) evsel->prev_raw_counts = counts; @@ -177,12 +179,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) { - int ncpus = evsel__nr_cpus(evsel); - int nthreads = perf_thread_map__nr(evsel->core.threads); - if (evsel__alloc_stat_priv(evsel) < 0 || - evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || - (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + evsel__alloc_counts(evsel) < 0 || + (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) return -ENOMEM; return 0; @@ -293,11 +292,12 @@ static bool pkg_id_equal(const void *__key1, const void *__key2, return *key1 == *key2; } -static int check_per_pkg(struct evsel *counter, - struct perf_counts_values *vals, int cpu, bool *skip) +static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, + int cpu_map_idx, bool *skip) { struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); + struct perf_cpu cpu = perf_cpu_map__cpu(cpus, cpu_map_idx); int s, d, ret = 0; uint64_t *key; @@ -328,7 +328,7 @@ static int check_per_pkg(struct evsel *counter, if (!(vals->run && vals->ena)) return 0; - s = cpu_map__get_socket(cpus, cpu, NULL).socket; + s = cpu__get_socket_id(cpu); if (s < 0) return -1; @@ -336,7 +336,7 @@ static int check_per_pkg(struct evsel *counter, * On multi-die system, die_id > 0. On no-die system, die_id = 0. * We use hashmap(socket, die) to check the used socket+die pair. */ - d = cpu_map__get_die(cpus, cpu, NULL).die; + d = cpu__get_die_id(cpu); if (d < 0) return -1; @@ -345,9 +345,10 @@ static int check_per_pkg(struct evsel *counter, return -ENOMEM; *key = (uint64_t)d << 32 | s; - if (hashmap__find(mask, (void *)key, NULL)) + if (hashmap__find(mask, (void *)key, NULL)) { *skip = true; - else + free(key); + } else ret = hashmap__add(mask, (void *)key, (void *)1); return ret; @@ -355,14 +356,14 @@ static int check_per_pkg(struct evsel *counter, static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, - int cpu, int thread, + int cpu_map_idx, int thread, struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, count, cpu, &skip)) { + if (check_per_pkg(evsel, count, cpu_map_idx, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } @@ -378,11 +379,11 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, case AGGR_NODE: case AGGR_NONE: if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu, thread, count); + evsel__compute_deltas(evsel, cpu_map_idx, thread, count); perf_counts_values__scale(count, config->scale, NULL); if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { perf_stat__update_shadow_stats(evsel, count->val, - cpu, &rt_stat); + cpu_map_idx, &rt_stat); } if (config->aggr_mode == AGGR_THREAD) { @@ -411,15 +412,15 @@ static int process_counter_maps(struct perf_stat_config *config, { int nthreads = perf_thread_map__nr(counter->core.threads); int ncpus = evsel__nr_cpus(counter); - int cpu, thread; + int idx, thread; if (counter->core.system_wide) nthreads = 1; for (thread = 0; thread < nthreads; thread++) { - for (cpu = 0; cpu < ncpus; cpu++) { - if (process_counter_values(config, counter, cpu, thread, - perf_counts(counter->counts, cpu, thread))) + for (idx = 0; idx < ncpus; idx++) { + if (process_counter_values(config, counter, idx, thread, + perf_counts(counter->counts, idx, thread))) return -1; } } @@ -531,7 +532,7 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu) + int cpu_map_idx) { struct perf_event_attr *attr = &evsel->core.attr; struct evsel *leader = evsel__leader(evsel); @@ -585,7 +586,7 @@ int create_perf_stat_counter(struct evsel *evsel, } if (target__has_cpu(target) && !target__has_per_thread(target)) - return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu); + return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu_map_idx); return evsel__open_per_thread(evsel, evsel->core.threads); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 32c8527de347..335d19cc3063 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -108,8 +108,7 @@ struct runtime_stat { struct rblist value_list; }; -typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, - struct perf_cpu_map *m, int cpu); +typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { enum aggr_mode aggr_mode; @@ -209,7 +208,7 @@ void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, - int cpu, struct runtime_stat *st); + int cpu_map_idx, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; @@ -249,10 +248,10 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); int create_perf_stat_counter(struct evsel *evsel, struct perf_stat_config *config, struct target *target, - int cpu); + int cpu_map_idx); void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv); struct metric_expr; -double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st); +double test_generic_metric(struct metric_expr *mexp, int cpu_map_idx, struct runtime_stat *st); #endif diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 96f941e01681..4c9f211249db 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -728,7 +728,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) int i; int ret = 0; struct perf_cpu_map *m; - int c; + struct perf_cpu c; m = perf_cpu_map__new(s); if (!m) @@ -736,12 +736,12 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) for (i = 0; i < m->nr; i++) { c = m->map[i]; - if (c >= nr_cpus) { + if (c.cpu >= nr_cpus) { ret = -1; break; } - set_bit(c, cpumask_bits(b)); + set_bit(c.cpu, cpumask_bits(b)); } perf_cpu_map__put(m); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 198982109f0f..c9ba8050cc2b 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1191,7 +1191,7 @@ static void synthesize_cpus(struct cpu_map_entries *cpus, cpus->nr = map->nr; for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i]; + cpus->cpu[i] = map->map[i].cpu; } static void synthesize_mask(struct perf_record_record_cpu_map *mask, @@ -1203,7 +1203,7 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask, mask->long_size = sizeof(long); for (i = 0; i < map->nr; i++) - set_bit(map->map[i], mask->mask); + set_bit(map->map[i].cpu, mask->mask); } static size_t cpus_size(struct perf_cpu_map *map) @@ -1219,7 +1219,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) for (i = 0; i < map->nr; i++) { /* bit position of the cpu is + 1 */ - int bit = map->map[i] + 1; + int bit = map->map[i].cpu + 1; if (bit > *max) *max = bit; @@ -1354,7 +1354,7 @@ int perf_event__synthesize_stat_config(struct perf_tool *tool, } int perf_event__synthesize_stat(struct perf_tool *tool, - u32 cpu, u32 thread, u64 id, + struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine) @@ -1366,7 +1366,7 @@ int perf_event__synthesize_stat(struct perf_tool *tool, event.header.misc = 0; event.id = id; - event.cpu = cpu; + event.cpu = cpu.cpu; event.thread = thread; event.val = count->val; event.ena = count->ena; @@ -1763,7 +1763,7 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ } e->idx = sid->idx; - e->cpu = sid->cpu; + e->cpu = sid->cpu.cpu; e->tid = sid->tid; } } diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index c931433bacbf..78a0450db164 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -6,6 +6,7 @@ #include <sys/types.h> // pid_t #include <linux/compiler.h> #include <linux/types.h> +#include <perf/cpumap.h> struct auxtrace_record; struct dso; @@ -63,7 +64,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs); int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); +int perf_event__synthesize_stat(struct perf_tool *tool, struct perf_cpu cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index df3c4671be72..fb4f6616b5fa 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -416,3 +416,18 @@ char *perf_exe(char *buf, int len) } return strcpy(buf, "perf"); } + +void perf_debuginfod_setup(struct perf_debuginfod *di) +{ + /* + * By default '!di->set' we clear DEBUGINFOD_URLS, so debuginfod + * processing is not triggered, otherwise we set it to 'di->urls' + * value. If 'di->urls' is "system" we keep DEBUGINFOD_URLS value. + */ + if (!di->set) + setenv("DEBUGINFOD_URLS", "", 1); + else if (di->urls && strcmp(di->urls, "system")) + setenv("DEBUGINFOD_URLS", di->urls, 1); + + pr_debug("DEBUGINFOD_URLS=%s\n", getenv("DEBUGINFOD_URLS")); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9f0d36ba77f2..7b625cbd2dd8 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -11,6 +11,9 @@ #include <stddef.h> #include <linux/compiler.h> #include <sys/types.h> +#ifndef __cplusplus +#include <internal/cpumap.h> +#endif /* General helper functions */ void usage(const char *err) __noreturn; @@ -66,6 +69,12 @@ extern bool test_attr__enabled; void test_attr__ready(void); void test_attr__init(void); struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, +void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, int fd, int group_fd, unsigned long flags); + +struct perf_debuginfod { + const char *urls; + bool set; +}; +void perf_debuginfod_setup(struct perf_debuginfod *di); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore index 0b319fc8bb17..eada0297ef88 100644 --- a/tools/power/acpi/.gitignore +++ b/tools/power/acpi/.gitignore @@ -2,4 +2,5 @@ /acpidbg /acpidump /ec +/pfrut /include/ diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile index a249c50ebf55..5ff1d9c864d0 100644 --- a/tools/power/acpi/Makefile +++ b/tools/power/acpi/Makefile @@ -9,18 +9,18 @@ include ../../scripts/Makefile.include .NOTPARALLEL: -all: acpidbg acpidump ec -clean: acpidbg_clean acpidump_clean ec_clean -install: acpidbg_install acpidump_install ec_install -uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall +all: acpidbg acpidump ec pfrut +clean: acpidbg_clean acpidump_clean ec_clean pfrut_clean +install: acpidbg_install acpidump_install ec_install pfrut_install +uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall -acpidbg acpidump ec: FORCE +acpidbg acpidump ec pfrut: FORCE $(call descend,tools/$@,all) -acpidbg_clean acpidump_clean ec_clean: +acpidbg_clean acpidump_clean ec_clean pfrut_clean: $(call descend,tools/$(@:_clean=),clean) -acpidbg_install acpidump_install ec_install: +acpidbg_install acpidump_install ec_install pfrut_install: $(call descend,tools/$(@:_install=),install) -acpidbg_uninstall acpidump_uninstall ec_uninstall: +acpidbg_uninstall acpidump_uninstall ec_uninstall pfrut_uninstall: $(call descend,tools/$(@:_uninstall=),uninstall) .PHONY: FORCE diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index 1d7616f5d0ae..b71aada77688 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -9,7 +9,7 @@ objdir := $(OUTPUT)tools/$(TOOL)/ toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) $(OUTPUT)$(TOOL): $(toolobjs) FORCE $(ECHO) " LD " $(subst $(OUTPUT),,$@) - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ + $(QUIET) $(LD) $(CFLAGS) $(toolobjs) $(LDFLAGS) -L$(OUTPUT) -o $@ $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ diff --git a/tools/power/acpi/man/pfrut.8 b/tools/power/acpi/man/pfrut.8 new file mode 100644 index 000000000000..3db574770e8d --- /dev/null +++ b/tools/power/acpi/man/pfrut.8 @@ -0,0 +1,137 @@ +.TH "PFRUT" "8" "October 2021" "pfrut 1.0" "" +.hy +.SH Name +.PP +pfrut \- Platform Firmware Runtime Update and Telemetry tool +.SH SYNOPSIS +.PP +\f[B]pfrut\f[R] [\f[I]Options\f[R]] +.SH DESCRIPTION +.PP +The PFRUT(Platform Firmware Runtime Update and Telemetry) kernel interface is designed +to +.PD 0 +.P +.PD +interact with the platform firmware interface defined in the +.PD 0 +.P +.PD +Management Mode Firmware Runtime +Update (https://uefi.org/sites/default/files/resources/Intel_MM_OS_Interface_Spec_Rev100.pdf) +.PD 0 +.P +.PD +\f[B]pfrut\f[R] is the tool to interact with the kernel interface. +.PD 0 +.P +.PD +.SH OPTIONS +.TP +.B \f[B]\-h\f[R], \f[B]\-\-help\f[R] +Display helper information. +.TP +.B \f[B]\-l\f[R], \f[B]\-\-load\f[R] +Load the capsule file into the system. +To be more specific, the capsule file will be copied to the +communication buffer. +.TP +.B \f[B]\-s\f[R], \f[B]\-\-stage\f[R] +Stage the capsule image from communication buffer into Management Mode +and perform authentication. +.TP +.B \f[B]\-a\f[R], \f[B]\-\-activate\f[R] +Activate a previous staged capsule image. +.TP +.B \f[B]\-u\f[R], \f[B]\-\-update\f[R] +Perform both stage and activation actions. +.TP +.B \f[B]\-q\f[R], \f[B]\-\-query\f[R] +Query the update capability. +.TP +.B \f[B]\-d\f[R], \f[B]\-\-setrev\f[R] +Set the revision ID of code injection/driver update. +.TP +.B \f[B]\-D\f[R], \f[B]\-\-setrevlog\f[R] +Set the revision ID of telemetry. +.TP +.B \f[B]\-G\f[R], \f[B]\-\-getloginfo\f[R] +Get telemetry log information and print it out. +.TP +.B \f[B]\-T\f[R], \f[B]\-\-type\f[R] +Set the telemetry log data type. +.TP +.B \f[B]\-L\f[R], \f[B]\-\-level\f[R] +Set the telemetry log level. +.TP +.B \f[B]\-R\f[R], \f[B]\-\-read\f[R] +Read all the telemetry data and print it out. +.SH EXAMPLES +.PP +\f[B]pfrut \-G\f[R] +.PP +log_level:4 +.PD 0 +.P +.PD +log_type:0 +.PD 0 +.P +.PD +log_revid:2 +.PD 0 +.P +.PD +max_data_size:65536 +.PD 0 +.P +.PD +chunk1_size:0 +.PD 0 +.P +.PD +chunk2_size:1401 +.PD 0 +.P +.PD +rollover_cnt:0 +.PD 0 +.P +.PD +reset_cnt:4 +.PP +\f[B]pfru \-q\f[R] +.PP +code injection image type:794bf8b2\-6e7b\-454e\-885f\-3fb9bb185402 +.PD 0 +.P +.PD +fw_version:0 +.PD 0 +.P +.PD +code_rt_version:1 +.PD 0 +.P +.PD +driver update image type:0e5f0b14\-f849\-7945\-ad81\-bc7b6d2bb245 +.PD 0 +.P +.PD +drv_rt_version:0 +.PD 0 +.P +.PD +drv_svn:0 +.PD 0 +.P +.PD +platform id:39214663\-b1a8\-4eaa\-9024\-f2bb53ea4723 +.PD 0 +.P +.PD +oem id:a36db54f\-ea2a\-e14e\-b7c4\-b5780e51ba3d +.PP +\f[B]pfrut \-l yours.cap \-u \-T 1 \-L 4\f[R] +.SH AUTHORS +Chen Yu. diff --git a/tools/power/acpi/tools/pfrut/Makefile b/tools/power/acpi/tools/pfrut/Makefile new file mode 100644 index 000000000000..61c1a96fd433 --- /dev/null +++ b/tools/power/acpi/tools/pfrut/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0+ + +include ../../Makefile.config + +TOOL = pfrut +EXTRA_INSTALL = install-man +EXTRA_UNINSTALL = uninstall-man + +CFLAGS += -Wall -O2 +CFLAGS += -DPFRUT_HEADER='"../../../../../include/uapi/linux/pfrut.h"' +LDFLAGS += -luuid + +TOOL_OBJS = \ + pfrut.o + +include ../../Makefile.rules + +install-man: $(srctree)/man/pfrut.8 + $(ECHO) " INST " pfrut.8 + $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/pfrut.8 +uninstall-man: + $(ECHO) " UNINST " pfrut.8 + $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/pfrut.8 diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c new file mode 100644 index 000000000000..d79c335594b2 --- /dev/null +++ b/tools/power/acpi/tools/pfrut/pfrut.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Platform Firmware Runtime Update tool to do Management + * Mode code injection/driver update and telemetry retrieval. + * + * This tool uses the interfaces provided by pfr_update and + * pfr_telemetry drivers. These interfaces are exposed via + * /dev/pfr_update and /dev/pfr_telemetry. Write operation + * on the /dev/pfr_update is to load the EFI capsule into + * kernel space. Mmap/read operations on /dev/pfr_telemetry + * could be used to read the telemetry data to user space. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <getopt.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <uuid/uuid.h> +#include PFRUT_HEADER + +char *capsule_name; +int action, query_cap, log_type, log_level, log_read, log_getinfo, + revid, log_revid; +int set_log_level, set_log_type, + set_revid, set_log_revid; + +char *progname; + +#define LOG_ERR 0 +#define LOG_WARN 1 +#define LOG_INFO 2 +#define LOG_VERB 4 +#define LOG_EXEC_IDX 0 +#define LOG_HISTORY_IDX 1 +#define REVID_1 1 +#define REVID_2 2 + +static int valid_log_level(int level) +{ + return level == LOG_ERR || level == LOG_WARN || + level == LOG_INFO || level == LOG_VERB; +} + +static int valid_log_type(int type) +{ + return type == LOG_EXEC_IDX || type == LOG_HISTORY_IDX; +} + +static inline int valid_log_revid(int id) +{ + return id == REVID_1 || id == REVID_2; +} + +static void help(void) +{ + fprintf(stderr, + "usage: %s [OPTIONS]\n" + " code injection:\n" + " -l, --load\n" + " -s, --stage\n" + " -a, --activate\n" + " -u, --update [stage and activate]\n" + " -q, --query\n" + " -d, --revid update\n" + " telemetry:\n" + " -G, --getloginfo\n" + " -T, --type(0:execution, 1:history)\n" + " -L, --level(0, 1, 2, 4)\n" + " -R, --read\n" + " -D, --revid log\n", + progname); +} + +char *option_string = "l:sauqd:GT:L:RD:h"; +static struct option long_options[] = { + {"load", required_argument, 0, 'l'}, + {"stage", no_argument, 0, 's'}, + {"activate", no_argument, 0, 'a'}, + {"update", no_argument, 0, 'u'}, + {"query", no_argument, 0, 'q'}, + {"getloginfo", no_argument, 0, 'G'}, + {"type", required_argument, 0, 'T'}, + {"level", required_argument, 0, 'L'}, + {"read", no_argument, 0, 'R'}, + {"setrev", required_argument, 0, 'd'}, + {"setrevlog", required_argument, 0, 'D'}, + {"help", no_argument, 0, 'h'}, + {} +}; + +static void parse_options(int argc, char **argv) +{ + int option_index = 0; + char *pathname; + int opt; + + pathname = strdup(argv[0]); + progname = basename(pathname); + + while ((opt = getopt_long_only(argc, argv, option_string, + long_options, &option_index)) != -1) { + switch (opt) { + case 'l': + capsule_name = optarg; + break; + case 's': + action = 1; + break; + case 'a': + action = 2; + break; + case 'u': + action = 3; + break; + case 'q': + query_cap = 1; + break; + case 'G': + log_getinfo = 1; + break; + case 'T': + log_type = atoi(optarg); + set_log_type = 1; + break; + case 'L': + log_level = atoi(optarg); + set_log_level = 1; + break; + case 'R': + log_read = 1; + break; + case 'd': + revid = atoi(optarg); + set_revid = 1; + break; + case 'D': + log_revid = atoi(optarg); + set_log_revid = 1; + break; + case 'h': + help(); + exit(0); + default: + break; + } + } +} + +void print_cap(struct pfru_update_cap_info *cap) +{ + char *uuid; + + uuid = malloc(37); + if (!uuid) { + perror("Can not allocate uuid buffer\n"); + exit(1); + } + + uuid_unparse(cap->code_type, uuid); + printf("code injection image type:%s\n", uuid); + printf("fw_version:%d\n", cap->fw_version); + printf("code_rt_version:%d\n", cap->code_rt_version); + + uuid_unparse(cap->drv_type, uuid); + printf("driver update image type:%s\n", uuid); + printf("drv_rt_version:%d\n", cap->drv_rt_version); + printf("drv_svn:%d\n", cap->drv_svn); + + uuid_unparse(cap->platform_id, uuid); + printf("platform id:%s\n", uuid); + uuid_unparse(cap->oem_id, uuid); + printf("oem id:%s\n", uuid); + printf("oem information length:%d\n", cap->oem_info_len); + + free(uuid); +} + +int main(int argc, char *argv[]) +{ + int fd_update, fd_update_log, fd_capsule; + struct pfrt_log_data_info data_info; + struct pfrt_log_info info; + struct pfru_update_cap_info cap; + void *addr_map_capsule; + struct stat st; + char *log_buf; + int ret = 0; + + if (getuid() != 0) { + printf("Please run the tool as root - Exiting.\n"); + return 1; + } + + parse_options(argc, argv); + + fd_update = open("/dev/acpi_pfr_update0", O_RDWR); + if (fd_update < 0) { + printf("PFRU device not supported - Quit...\n"); + return 1; + } + + fd_update_log = open("/dev/acpi_pfr_telemetry0", O_RDWR); + if (fd_update_log < 0) { + printf("PFRT device not supported - Quit...\n"); + return 1; + } + + if (query_cap) { + ret = ioctl(fd_update, PFRU_IOC_QUERY_CAP, &cap); + if (ret) + perror("Query Update Capability info failed."); + else + print_cap(&cap); + + close(fd_update); + close(fd_update_log); + + return ret; + } + + if (log_getinfo) { + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info); + if (ret) { + perror("Get telemetry data info failed."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_INFO, &info); + if (ret) { + perror("Get telemetry info failed."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + printf("log_level:%d\n", info.log_level); + printf("log_type:%d\n", info.log_type); + printf("log_revid:%d\n", info.log_revid); + printf("max_data_size:%d\n", data_info.max_data_size); + printf("chunk1_size:%d\n", data_info.chunk1_size); + printf("chunk2_size:%d\n", data_info.chunk2_size); + printf("rollover_cnt:%d\n", data_info.rollover_cnt); + printf("reset_cnt:%d\n", data_info.reset_cnt); + + return 0; + } + + info.log_level = -1; + info.log_type = -1; + info.log_revid = -1; + + if (set_log_level) { + if (!valid_log_level(log_level)) { + printf("Invalid log level %d\n", + log_level); + } else { + info.log_level = log_level; + } + } + + if (set_log_type) { + if (!valid_log_type(log_type)) { + printf("Invalid log type %d\n", + log_type); + } else { + info.log_type = log_type; + } + } + + if (set_log_revid) { + if (!valid_log_revid(log_revid)) { + printf("Invalid log revid %d, unchanged.\n", + log_revid); + } else { + info.log_revid = log_revid; + } + } + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_SET_INFO, &info); + if (ret) { + perror("Log information set failed.(log_level, log_type, log_revid)"); + close(fd_update); + close(fd_update_log); + + return 1; + } + + if (set_revid) { + ret = ioctl(fd_update, PFRU_IOC_SET_REV, &revid); + if (ret) { + perror("pfru update revid set failed"); + close(fd_update); + close(fd_update_log); + + return 1; + } + + printf("pfru update revid set to %d\n", revid); + } + + if (capsule_name) { + fd_capsule = open(capsule_name, O_RDONLY); + if (fd_capsule < 0) { + perror("Can not open capsule file..."); + close(fd_update); + close(fd_update_log); + + return 1; + } + + if (fstat(fd_capsule, &st) < 0) { + perror("Can not fstat capsule file..."); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + addr_map_capsule = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, + fd_capsule, 0); + if (addr_map_capsule == MAP_FAILED) { + perror("Failed to mmap capsule file."); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + ret = write(fd_update, (char *)addr_map_capsule, st.st_size); + printf("Load %d bytes of capsule file into the system\n", + ret); + + if (ret == -1) { + perror("Failed to load capsule file"); + close(fd_capsule); + close(fd_update); + close(fd_update_log); + + return 1; + } + + munmap(addr_map_capsule, st.st_size); + close(fd_capsule); + printf("Load done.\n"); + } + + if (action) { + if (action == 1) { + ret = ioctl(fd_update, PFRU_IOC_STAGE, NULL); + } else if (action == 2) { + ret = ioctl(fd_update, PFRU_IOC_ACTIVATE, NULL); + } else if (action == 3) { + ret = ioctl(fd_update, PFRU_IOC_STAGE_ACTIVATE, NULL); + } else { + close(fd_update); + close(fd_update_log); + + return 1; + } + printf("Update finished, return %d\n", ret); + } + + close(fd_update); + + if (log_read) { + void *p_mmap; + int max_data_sz; + + ret = ioctl(fd_update_log, PFRT_LOG_IOC_GET_DATA_INFO, &data_info); + if (ret) { + perror("Get telemetry data info failed."); + close(fd_update_log); + + return 1; + } + + max_data_sz = data_info.max_data_size; + if (!max_data_sz) { + printf("No telemetry data available.\n"); + close(fd_update_log); + + return 1; + } + + log_buf = malloc(max_data_sz + 1); + if (!log_buf) { + perror("log_buf allocate failed."); + close(fd_update_log); + + return 1; + } + + p_mmap = mmap(NULL, max_data_sz, PROT_READ, MAP_SHARED, fd_update_log, 0); + if (p_mmap == MAP_FAILED) { + perror("mmap error."); + close(fd_update_log); + + return 1; + } + + memcpy(log_buf, p_mmap, max_data_sz); + log_buf[max_data_sz] = '\0'; + printf("%s\n", log_buf); + free(log_buf); + + munmap(p_mmap, max_data_sz); + } + + close(fd_update_log); + + return 0; +} diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index bf9fd3549a1d..efe72fa48224 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -15,7 +15,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.10"; +static const char *version_str = "v1.11"; static const int supported_api_ver = 1; static struct isst_if_platform_info isst_platform_info; static char *progname; @@ -1599,6 +1599,7 @@ static void set_scaling_min_to_cpuinfo_max(int cpu) die_id != get_physical_die_id(i)) continue; + adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); adjust_scaling_min_from_base_freq(i); } @@ -1615,6 +1616,7 @@ static void set_scaling_min_to_cpuinfo_min(int cpu) die_id != get_physical_die_id(i)) continue; + adjust_scaling_max_from_base_freq(i); set_cpufreq_scaling_min_max_from_cpuinfo(i, 0, 0); } } diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 071312f5eb92..b0be5f40a3f1 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -87,7 +87,18 @@ LLVM_STRIP ?= llvm-strip ifeq ($(CC_NO_CLANG), 1) EXTRA_WARNINGS += -Wstrict-aliasing=3 -endif + +else ifneq ($(CROSS_COMPILE),) +CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc)) +ifneq ($(GCC_TOOLCHAIN_DIR),) +CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) +CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) +CLANG_CROSS_FLAGS += --gcc-toolchain=$(realpath $(GCC_TOOLCHAIN_DIR)/..) +endif # GCC_TOOLCHAIN_DIR +CFLAGS += $(CLANG_CROSS_FLAGS) +AFLAGS += $(CLANG_CROSS_FLAGS) +endif # CROSS_COMPILE # Hack to avoid type-punned warnings on old systems such as RHEL5: # We should be changing CFLAGS and checking gcc version, but this diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 86deba8308a1..1acdf2fc31c5 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +ldflags-y += --wrap=acpi_table_parse_cedt ldflags-y += --wrap=is_acpi_device_node -ldflags-y += --wrap=acpi_get_table -ldflags-y += --wrap=acpi_put_table ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=pci_walk_bus diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index cb32f9e27d5d..736d99006fb7 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -182,6 +182,13 @@ static struct { }, }; +struct acpi_cedt_cfmws *mock_cfmws[4] = { + [0] = &mock_cedt.cfmws0.cfmws, + [1] = &mock_cedt.cfmws1.cfmws, + [2] = &mock_cedt.cfmws2.cfmws, + [3] = &mock_cedt.cfmws3.cfmws, +}; + struct cxl_mock_res { struct list_head list; struct range range; @@ -232,12 +239,6 @@ static struct cxl_mock_res *alloc_mock_res(resource_size_t size) static int populate_cedt(void) { - struct acpi_cedt_cfmws *cfmws[4] = { - [0] = &mock_cedt.cfmws0.cfmws, - [1] = &mock_cedt.cfmws1.cfmws, - [2] = &mock_cedt.cfmws2.cfmws, - [3] = &mock_cedt.cfmws3.cfmws, - }; struct cxl_mock_res *res; int i; @@ -257,8 +258,8 @@ static int populate_cedt(void) chbs->length = size; } - for (i = 0; i < ARRAY_SIZE(cfmws); i++) { - struct acpi_cedt_cfmws *window = cfmws[i]; + for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + struct acpi_cedt_cfmws *window = mock_cfmws[i]; res = alloc_mock_res(window->window_size); if (!res) @@ -269,21 +270,44 @@ static int populate_cedt(void) return 0; } -static acpi_status mock_acpi_get_table(char *signature, u32 instance, - struct acpi_table_header **out_table) +/* + * WARNING, this hack assumes the format of 'struct + * cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that + * the first struct member is the device being probed by the cxl_acpi + * driver. + */ +struct cxl_cedt_context { + struct device *dev; +}; + +static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, + void *arg) { - if (instance < U32_MAX || strcmp(signature, ACPI_SIG_CEDT) != 0) - return acpi_get_table(signature, instance, out_table); + struct cxl_cedt_context *ctx = arg; + struct device *dev = ctx->dev; + union acpi_subtable_headers *h; + unsigned long end; + int i; - *out_table = (struct acpi_table_header *) &mock_cedt; - return AE_OK; -} + if (dev != &cxl_acpi->dev) + return acpi_table_parse_cedt(id, handler_arg, arg); -static void mock_acpi_put_table(struct acpi_table_header *table) -{ - if (table == (struct acpi_table_header *) &mock_cedt) - return; - acpi_put_table(table); + if (id == ACPI_CEDT_TYPE_CHBS) + for (i = 0; i < ARRAY_SIZE(mock_cedt.chbs); i++) { + h = (union acpi_subtable_headers *)&mock_cedt.chbs[i]; + end = (unsigned long)&mock_cedt.chbs[i + 1]; + handler_arg(h, arg, end); + } + + if (id == ACPI_CEDT_TYPE_CFMWS) + for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + h = (union acpi_subtable_headers *) mock_cfmws[i]; + end = (unsigned long) h + mock_cfmws[i]->header.length; + handler_arg(h, arg, end); + } + + return 0; } static bool is_mock_bridge(struct device *dev) @@ -388,8 +412,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .is_mock_port = is_mock_port, .is_mock_dev = is_mock_dev, .mock_port = mock_cxl_root_port, - .acpi_get_table = mock_acpi_get_table, - .acpi_put_table = mock_acpi_put_table, + .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, .list = LIST_HEAD_INIT(cxl_mock_ops.list), @@ -574,3 +597,4 @@ static __exit void cxl_test_exit(void) module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(ACPI); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 12a8437a9ca0..8c2086c4caef 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -28,8 +28,24 @@ static struct cxl_cel_entry mock_cel[] = { .opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA), .effect = cpu_to_le16(EFFECT(1) | EFFECT(2)), }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO), + .effect = cpu_to_le16(0), + }, }; +/* See CXL 2.0 Table 181 Get Health Info Output Payload */ +struct cxl_mbox_health_info { + u8 health_status; + u8 media_status; + u8 ext_status; + u8 life_used; + __le16 temperature; + __le32 dirty_shutdowns; + __le32 volatile_errors; + __le32 pmem_errors; +} __packed; + static struct { struct cxl_mbox_get_supported_logs gsl; struct cxl_gsl_entry entry; @@ -54,7 +70,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd) return 0; } -static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_log *gl = cmd->payload_in; u32 offset = le32_to_cpu(gl->offset); @@ -64,7 +80,7 @@ static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) if (cmd->size_in < sizeof(*gl)) return -EINVAL; - if (length > cxlm->payload_size) + if (length > cxlds->payload_size) return -EINVAL; if (offset + length > sizeof(mock_cel)) return -EINVAL; @@ -78,9 +94,9 @@ static int mock_get_log(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { - struct platform_device *pdev = to_platform_device(cxlm->dev); + struct platform_device *pdev = to_platform_device(cxlds->dev); struct cxl_mbox_identify id = { .fw_revision = { "mock fw v1 " }, .lsa_size = cpu_to_le32(LSA_SIZE), @@ -120,10 +136,10 @@ static int mock_id(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlm->dev); + void *lsa = dev_get_drvdata(cxlds->dev); u32 offset, length; if (sizeof(*get_lsa) > cmd->size_in) @@ -139,10 +155,10 @@ static int mock_get_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlm->dev); + void *lsa = dev_get_drvdata(cxlds->dev); u32 offset, length; if (sizeof(*set_lsa) > cmd->size_in) @@ -156,9 +172,39 @@ static int mock_set_lsa(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) return 0; } -static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) +static int mock_health_info(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) { - struct device *dev = cxlm->dev; + struct cxl_mbox_health_info health_info = { + /* set flags for maint needed, perf degraded, hw replacement */ + .health_status = 0x7, + /* set media status to "All Data Lost" */ + .media_status = 0x3, + /* + * set ext_status flags for: + * ext_life_used: normal, + * ext_temperature: critical, + * ext_corrected_volatile: warning, + * ext_corrected_persistent: normal, + */ + .ext_status = 0x18, + .life_used = 15, + .temperature = cpu_to_le16(25), + .dirty_shutdowns = cpu_to_le32(10), + .volatile_errors = cpu_to_le32(20), + .pmem_errors = cpu_to_le32(30), + }; + + if (cmd->size_out < sizeof(health_info)) + return -EINVAL; + + memcpy(cmd->payload_out, &health_info, sizeof(health_info)); + return 0; +} + +static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct device *dev = cxlds->dev; int rc = -EIO; switch (cmd->opcode) { @@ -166,16 +212,19 @@ static int cxl_mock_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) rc = mock_gsl(cmd); break; case CXL_MBOX_OP_GET_LOG: - rc = mock_get_log(cxlm, cmd); + rc = mock_get_log(cxlds, cmd); break; case CXL_MBOX_OP_IDENTIFY: - rc = mock_id(cxlm, cmd); + rc = mock_id(cxlds, cmd); break; case CXL_MBOX_OP_GET_LSA: - rc = mock_get_lsa(cxlm, cmd); + rc = mock_get_lsa(cxlds, cmd); break; case CXL_MBOX_OP_SET_LSA: - rc = mock_set_lsa(cxlm, cmd); + rc = mock_set_lsa(cxlds, cmd); + break; + case CXL_MBOX_OP_GET_HEALTH_INFO: + rc = mock_health_info(cxlds, cmd); break; default: break; @@ -196,7 +245,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct cxl_memdev *cxlmd; - struct cxl_mem *cxlm; + struct cxl_dev_state *cxlds; void *lsa; int rc; @@ -208,30 +257,30 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) return rc; dev_set_drvdata(dev, lsa); - cxlm = cxl_mem_create(dev); - if (IS_ERR(cxlm)) - return PTR_ERR(cxlm); + cxlds = cxl_dev_state_create(dev); + if (IS_ERR(cxlds)) + return PTR_ERR(cxlds); - cxlm->mbox_send = cxl_mock_mbox_send; - cxlm->payload_size = SZ_4K; + cxlds->mbox_send = cxl_mock_mbox_send; + cxlds->payload_size = SZ_4K; - rc = cxl_mem_enumerate_cmds(cxlm); + rc = cxl_enumerate_cmds(cxlds); if (rc) return rc; - rc = cxl_mem_identify(cxlm); + rc = cxl_dev_state_identify(cxlds); if (rc) return rc; - rc = cxl_mem_create_range_info(cxlm); + rc = cxl_mem_create_range_info(cxlds); if (rc) return rc; - cxlmd = devm_cxl_add_memdev(cxlm); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) + if (range_len(&cxlds->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) rc = devm_cxl_add_nvdimm(dev, cxlmd); return 0; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index b8c108abcf07..17408f892df4 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -58,36 +58,23 @@ bool __wrap_is_acpi_device_node(const struct fwnode_handle *fwnode) } EXPORT_SYMBOL(__wrap_is_acpi_device_node); -acpi_status __wrap_acpi_get_table(char *signature, u32 instance, - struct acpi_table_header **out_table) +int __wrap_acpi_table_parse_cedt(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, + void *arg) { - int index; + int index, rc; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - acpi_status status; if (ops) - status = ops->acpi_get_table(signature, instance, out_table); + rc = ops->acpi_table_parse_cedt(id, handler_arg, arg); else - status = acpi_get_table(signature, instance, out_table); + rc = acpi_table_parse_cedt(id, handler_arg, arg); put_cxl_mock_ops(index); - return status; -} -EXPORT_SYMBOL(__wrap_acpi_get_table); - -void __wrap_acpi_put_table(struct acpi_table_header *table) -{ - int index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops) - ops->acpi_put_table(table); - else - acpi_put_table(table); - put_cxl_mock_ops(index); + return rc; } -EXPORT_SYMBOL(__wrap_acpi_put_table); +EXPORT_SYMBOL_NS_GPL(__wrap_acpi_table_parse_cedt, ACPI); acpi_status __wrap_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, @@ -169,3 +156,4 @@ __wrap_nvdimm_bus_register(struct device *dev, EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(ACPI); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 805a94cb3fbe..15ed0fd877e4 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -6,9 +6,9 @@ struct cxl_mock_ops { struct list_head list; bool (*is_mock_adev)(struct acpi_device *dev); - acpi_status (*acpi_get_table)(char *signature, u32 instance, - struct acpi_table_header **out_table); - void (*acpi_put_table)(struct acpi_table_header *table); + int (*acpi_table_parse_cedt)(enum acpi_cedt_type id, + acpi_tbl_entry_handler_arg handler_arg, + void *arg); bool (*is_mock_bridge)(struct device *dev); acpi_status (*acpi_evaluate_integer)(acpi_handle handle, acpi_string pathname, diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 68e6f461c758..7a706f96f68d 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -15,38 +15,57 @@ import time assert sys.version_info >= (3, 7), "Python version is too old" -from collections import namedtuple +from dataclasses import dataclass from enum import Enum, auto -from typing import Iterable, Sequence, List +from typing import Any, Iterable, Sequence, List, Optional import kunit_json import kunit_kernel import kunit_parser -KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time']) - -KunitConfigRequest = namedtuple('KunitConfigRequest', - ['build_dir', 'make_options']) -KunitBuildRequest = namedtuple('KunitBuildRequest', - ['jobs', 'build_dir', 'alltests', - 'make_options']) -KunitExecRequest = namedtuple('KunitExecRequest', - ['timeout', 'build_dir', 'alltests', - 'filter_glob', 'kernel_args', 'run_isolated']) -KunitParseRequest = namedtuple('KunitParseRequest', - ['raw_output', 'build_dir', 'json']) -KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', 'filter_glob', - 'kernel_args', 'run_isolated', 'json', 'make_options']) - -KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] - class KunitStatus(Enum): SUCCESS = auto() CONFIG_FAILURE = auto() BUILD_FAILURE = auto() TEST_FAILURE = auto() +@dataclass +class KunitResult: + status: KunitStatus + result: Any + elapsed_time: float + +@dataclass +class KunitConfigRequest: + build_dir: str + make_options: Optional[List[str]] + +@dataclass +class KunitBuildRequest(KunitConfigRequest): + jobs: int + alltests: bool + +@dataclass +class KunitParseRequest: + raw_output: Optional[str] + build_dir: str + json: Optional[str] + +@dataclass +class KunitExecRequest(KunitParseRequest): + timeout: int + alltests: bool + filter_glob: str + kernel_args: Optional[List[str]] + run_isolated: Optional[str] + +@dataclass +class KunitRequest(KunitExecRequest, KunitBuildRequest): + pass + + +KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] + def get_kernel_root_path() -> str: path = sys.argv[0] if not __file__ else __file__ parts = os.path.realpath(path).split('tools/testing/kunit') @@ -91,6 +110,14 @@ def build_tests(linux: kunit_kernel.LinuxSourceTree, 'built kernel successfully', build_end - build_start) +def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree, + request: KunitBuildRequest) -> KunitResult: + config_result = config_tests(linux, request) + if config_result.status != KunitStatus.SUCCESS: + return config_result + + return build_tests(linux, request) + def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]: args = ['kunit.action=list'] if request.kernel_args: @@ -121,8 +148,7 @@ def _suites_from_test_list(tests: List[str]) -> List[str]: -def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest, - parse_request: KunitParseRequest) -> KunitResult: +def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult: filter_globs = [request.filter_glob] if request.run_isolated: tests = _list_tests(linux, request) @@ -147,17 +173,23 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest, filter_glob=filter_glob, build_dir=request.build_dir) - result = parse_tests(parse_request, run_result) + result = parse_tests(request, run_result) # run_kernel() doesn't block on the kernel exiting. # That only happens after we get the last line of output from `run_result`. # So exec_time here actually contains parsing + execution time, which is fine. test_end = time.time() exec_time += test_end - test_start - test_counts.add_subtest_counts(result.result.test.counts) + test_counts.add_subtest_counts(result.result.counts) + + if len(filter_globs) == 1 and test_counts.crashed > 0: + bd = request.build_dir + print('The kernel seems to have crashed; you can decode the stack traces with:') + print('$ scripts/decode_stacktrace.sh {}/vmlinux {} < {} | tee {}/decoded.log | {} parse'.format( + bd, bd, kunit_kernel.get_outfile_path(bd), bd, sys.argv[0])) kunit_status = _map_to_overall_status(test_counts.get_status()) - return KunitResult(status=kunit_status, result=result.result, elapsed_time=exec_time) + return KunitResult(status=kunit_status, result=result, elapsed_time=exec_time) def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: if test_status in (kunit_parser.TestStatus.SUCCESS, kunit_parser.TestStatus.SKIPPED): @@ -168,14 +200,12 @@ def _map_to_overall_status(test_status: kunit_parser.TestStatus) -> KunitStatus: def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitResult: parse_start = time.time() - test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, - kunit_parser.Test(), - 'Tests not Parsed.') + test_result = kunit_parser.Test() if request.raw_output: # Treat unparsed results as one passing test. - test_result.test.status = kunit_parser.TestStatus.SUCCESS - test_result.test.counts.passed = 1 + test_result.status = kunit_parser.TestStatus.SUCCESS + test_result.counts.passed = 1 output: Iterable[str] = input_data if request.raw_output == 'all': @@ -193,7 +223,7 @@ def parse_tests(request: KunitParseRequest, input_data: Iterable[str]) -> KunitR if request.json: json_obj = kunit_json.get_json_result( - test_result=test_result, + test=test_result, def_config='kunit_defconfig', build_dir=request.build_dir, json_path=request.json) @@ -211,27 +241,15 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitRequest) -> KunitResult: run_start = time.time() - config_request = KunitConfigRequest(request.build_dir, - request.make_options) - config_result = config_tests(linux, config_request) + config_result = config_tests(linux, request) if config_result.status != KunitStatus.SUCCESS: return config_result - build_request = KunitBuildRequest(request.jobs, request.build_dir, - request.alltests, - request.make_options) - build_result = build_tests(linux, build_request) + build_result = build_tests(linux, request) if build_result.status != KunitStatus.SUCCESS: return build_result - exec_request = KunitExecRequest(request.timeout, request.build_dir, - request.alltests, request.filter_glob, - request.kernel_args, request.run_isolated) - parse_request = KunitParseRequest(request.raw_output, - request.build_dir, - request.json) - - exec_result = exec_tests(linux, exec_request, parse_request) + exec_result = exec_tests(linux, request) run_end = time.time() @@ -264,6 +282,9 @@ def massage_argv(argv: Sequence[str]) -> Sequence[str]: return f'{arg}={pseudo_bool_flag_defaults[arg]}' return list(map(massage_arg, argv)) +def get_default_jobs() -> int: + return len(os.sched_getaffinity(0)) + def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' @@ -280,6 +301,10 @@ def add_common_opts(parser) -> None: ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' 'will get automatically appended.', metavar='kunitconfig') + parser.add_argument('--kconfig_add', + help='Additional Kconfig options to append to the ' + '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.', + action='append') parser.add_argument('--arch', help=('Specifies the architecture to run tests under. ' @@ -310,7 +335,7 @@ def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' 'jobs (commands) to run simultaneously."', - type=int, default=8, metavar='jobs') + type=int, default=get_default_jobs(), metavar='jobs') def add_exec_opts(parser) -> None: parser.add_argument('--timeout', @@ -398,20 +423,21 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitRequest(cli_args.raw_output, - cli_args.timeout, - cli_args.jobs, - cli_args.build_dir, - cli_args.alltests, - cli_args.filter_glob, - cli_args.kernel_args, - cli_args.run_isolated, - cli_args.json, - cli_args.make_options) + request = KunitRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options, + jobs=cli_args.jobs, + alltests=cli_args.alltests, + raw_output=cli_args.raw_output, + json=cli_args.json, + timeout=cli_args.timeout, + filter_glob=cli_args.filter_glob, + kernel_args=cli_args.kernel_args, + run_isolated=cli_args.run_isolated) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) @@ -423,12 +449,13 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitConfigRequest(cli_args.build_dir, - cli_args.make_options) + request = KunitConfigRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options) result = config_tests(linux, request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( @@ -439,15 +466,16 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - request = KunitBuildRequest(cli_args.jobs, - cli_args.build_dir, - cli_args.alltests, - cli_args.make_options) - result = build_tests(linux, request) + request = KunitBuildRequest(build_dir=cli_args.build_dir, + make_options=cli_args.make_options, + jobs=cli_args.jobs, + alltests=cli_args.alltests) + result = config_and_build_tests(linux, request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( result.elapsed_time)) @@ -457,20 +485,20 @@ def main(argv, linux=None): if not linux: linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, kunitconfig_path=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, arch=cli_args.arch, cross_compile=cli_args.cross_compile, qemu_config_path=cli_args.qemu_config) - exec_request = KunitExecRequest(cli_args.timeout, - cli_args.build_dir, - cli_args.alltests, - cli_args.filter_glob, - cli_args.kernel_args, - cli_args.run_isolated) - parse_request = KunitParseRequest(cli_args.raw_output, - cli_args.build_dir, - cli_args.json) - result = exec_tests(linux, exec_request, parse_request) + exec_request = KunitExecRequest(raw_output=cli_args.raw_output, + build_dir=cli_args.build_dir, + json=cli_args.json, + timeout=cli_args.timeout, + alltests=cli_args.alltests, + filter_glob=cli_args.filter_glob, + kernel_args=cli_args.kernel_args, + run_isolated=cli_args.run_isolated) + result = exec_tests(linux, exec_request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % (result.elapsed_time)) if result.status != KunitStatus.SUCCESS: @@ -482,9 +510,9 @@ def main(argv, linux=None): else: with open(cli_args.file, 'r', errors='backslashreplace') as f: kunit_output = f.read().splitlines() - request = KunitParseRequest(cli_args.raw_output, - None, - cli_args.json) + request = KunitParseRequest(raw_output=cli_args.raw_output, + build_dir='', + json=cli_args.json) result = parse_tests(request, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index c77c7d2ef622..677354546156 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -62,33 +62,34 @@ class Kconfig(object): for entry in self.entries(): f.write(str(entry) + '\n') - def parse_from_string(self, blob: str) -> None: - """Parses a string containing KconfigEntrys and populates this Kconfig.""" - self._entries = [] - is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN) - config_matcher = re.compile(CONFIG_PATTERN) - for line in blob.split('\n'): - line = line.strip() - if not line: - continue - - match = config_matcher.match(line) - if match: - entry = KconfigEntry(match.group(1), match.group(2)) - self.add_entry(entry) - continue - - empty_match = is_not_set_matcher.match(line) - if empty_match: - entry = KconfigEntry(empty_match.group(1), 'n') - self.add_entry(entry) - continue - - if line[0] == '#': - continue - else: - raise KconfigParseError('Failed to parse: ' + line) - - def read_from_file(self, path: str) -> None: - with open(path, 'r') as f: - self.parse_from_string(f.read()) +def parse_file(path: str) -> Kconfig: + with open(path, 'r') as f: + return parse_from_string(f.read()) + +def parse_from_string(blob: str) -> Kconfig: + """Parses a string containing Kconfig entries.""" + kconfig = Kconfig() + is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN) + config_matcher = re.compile(CONFIG_PATTERN) + for line in blob.split('\n'): + line = line.strip() + if not line: + continue + + match = config_matcher.match(line) + if match: + entry = KconfigEntry(match.group(1), match.group(2)) + kconfig.add_entry(entry) + continue + + empty_match = is_not_set_matcher.match(line) + if empty_match: + entry = KconfigEntry(empty_match.group(1), 'n') + kconfig.add_entry(entry) + continue + + if line[0] == '#': + continue + else: + raise KconfigParseError('Failed to parse: ' + line) + return kconfig diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 746bec72b9ac..6862671709bc 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -11,7 +11,7 @@ import os import kunit_parser -from kunit_parser import Test, TestResult, TestStatus +from kunit_parser import Test, TestStatus from typing import Any, Dict, Optional JsonObj = Dict[str, Any] @@ -30,6 +30,8 @@ def _get_group_json(test: Test, def_config: str, test_case = {"name": subtest.name, "status": "FAIL"} if subtest.status == TestStatus.SUCCESS: test_case["status"] = "PASS" + elif subtest.status == TestStatus.SKIPPED: + test_case["status"] = "SKIP" elif subtest.status == TestStatus.TEST_CRASHED: test_case["status"] = "ERROR" test_cases.append(test_case) @@ -48,9 +50,9 @@ def _get_group_json(test: Test, def_config: str, } return test_group -def get_json_result(test_result: TestResult, def_config: str, +def get_json_result(test: Test, def_config: str, build_dir: Optional[str], json_path: str) -> str: - test_group = _get_group_json(test_result.test, def_config, build_dir) + test_group = _get_group_json(test, def_config, build_dir) test_group["name"] = "KUnit Test Group" json_obj = json.dumps(test_group, indent=4) if json_path != 'stdout': diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 66095568bf32..44bbe54f25f1 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -21,6 +21,7 @@ import qemu_config KCONFIG_PATH = '.config' KUNITCONFIG_PATH = '.kunitconfig' +OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' OUTFILE_PATH = 'test.log' @@ -116,8 +117,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._extra_qemu_params = qemu_arch_params.extra_qemu_params def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None: - kconfig = kunit_config.Kconfig() - kconfig.parse_from_string(self._kconfig) + kconfig = kunit_config.parse_from_string(self._kconfig) base_kunitconfig.merge_in_entries(kconfig) def start(self, params: List[str], build_dir: str) -> subprocess.Popen: @@ -180,6 +180,9 @@ def get_kconfig_path(build_dir) -> str: def get_kunitconfig_path(build_dir) -> str: return get_file_path(build_dir, KUNITCONFIG_PATH) +def get_old_kunitconfig_path(build_dir) -> str: + return get_file_path(build_dir, OLD_KUNITCONFIG_PATH) + def get_outfile_path(build_dir) -> str: return get_file_path(build_dir, OUTFILE_PATH) @@ -206,6 +209,7 @@ def get_source_tree_ops_from_qemu_config(config_path: str, # exists as a file. module_path = '.' + os.path.join(os.path.basename(QEMU_CONFIGS_DIR), os.path.basename(config_path)) spec = importlib.util.spec_from_file_location(module_path, config_path) + assert spec is not None config = importlib.util.module_from_spec(spec) # See https://github.com/python/typeshed/pull/2626 for context. assert isinstance(spec.loader, importlib.abc.Loader) @@ -225,6 +229,7 @@ class LinuxSourceTree(object): build_dir: str, load_config=True, kunitconfig_path='', + kconfig_add: Optional[List[str]]=None, arch=None, cross_compile=None, qemu_config_path=None) -> None: @@ -249,8 +254,11 @@ class LinuxSourceTree(object): if not os.path.exists(kunitconfig_path): shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) - self._kconfig = kunit_config.Kconfig() - self._kconfig.read_from_file(kunitconfig_path) + self._kconfig = kunit_config.parse_file(kunitconfig_path) + if kconfig_add: + kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add)) + self._kconfig.merge_in_entries(kconfig) + def clean(self) -> bool: try: @@ -262,17 +270,18 @@ class LinuxSourceTree(object): def validate_config(self, build_dir) -> bool: kconfig_path = get_kconfig_path(build_dir) - validated_kconfig = kunit_config.Kconfig() - validated_kconfig.read_from_file(kconfig_path) - if not self._kconfig.is_subset_of(validated_kconfig): - invalid = self._kconfig.entries() - validated_kconfig.entries() - message = 'Provided Kconfig is not contained in validated .config. Following fields found in kunitconfig, ' \ - 'but not in .config: %s' % ( - ', '.join([str(e) for e in invalid]) - ) - logging.error(message) - return False - return True + validated_kconfig = kunit_config.parse_file(kconfig_path) + if self._kconfig.is_subset_of(validated_kconfig): + return True + invalid = self._kconfig.entries() - validated_kconfig.entries() + message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \ + 'This is probably due to unsatisfied dependencies.\n' \ + 'Missing: ' + ', '.join([str(e) for e in invalid]) + if self._arch == 'um': + message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \ + 'on a different architecture with something like "--arch=x86_64".' + logging.error(message) + return False def build_config(self, build_dir, make_options) -> bool: kconfig_path = get_kconfig_path(build_dir) @@ -285,25 +294,38 @@ class LinuxSourceTree(object): except ConfigError as e: logging.error(e) return False - return self.validate_config(build_dir) + if not self.validate_config(build_dir): + return False + + old_path = get_old_kunitconfig_path(build_dir) + if os.path.exists(old_path): + os.remove(old_path) # write_to_file appends to the file + self._kconfig.write_to_file(old_path) + return True + + def _kunitconfig_changed(self, build_dir: str) -> bool: + old_path = get_old_kunitconfig_path(build_dir) + if not os.path.exists(old_path): + return True + + old_kconfig = kunit_config.parse_file(old_path) + return old_kconfig.entries() != self._kconfig.entries() def build_reconfig(self, build_dir, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" kconfig_path = get_kconfig_path(build_dir) - if os.path.exists(kconfig_path): - existing_kconfig = kunit_config.Kconfig() - existing_kconfig.read_from_file(kconfig_path) - self._ops.make_arch_qemuconfig(self._kconfig) - if not self._kconfig.is_subset_of(existing_kconfig): - print('Regenerating .config ...') - os.remove(kconfig_path) - return self.build_config(build_dir, make_options) - else: - return True - else: + if not os.path.exists(kconfig_path): print('Generating .config ...') return self.build_config(build_dir, make_options) + existing_kconfig = kunit_config.parse_file(kconfig_path) + self._ops.make_arch_qemuconfig(self._kconfig) + if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir): + return True + print('Regenerating .config ...') + os.remove(kconfig_path) + return self.build_config(build_dir, make_options) + def build_kernel(self, alltests, jobs, build_dir, make_options) -> bool: try: if alltests: diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 3355196d0515..05ff334761dd 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -12,14 +12,11 @@ from __future__ import annotations import re -from collections import namedtuple -from datetime import datetime +import datetime from enum import Enum, auto from functools import reduce from typing import Iterable, Iterator, List, Optional, Tuple -TestResult = namedtuple('TestResult', ['status','test','log']) - class Test(object): """ A class to represent a test parsed from KTAP results. All KTAP @@ -168,42 +165,51 @@ class TestCounts: class LineStream: """ A class to represent the lines of kernel output. - Provides a peek()/pop() interface over an iterator of + Provides a lazy peek()/pop() interface over an iterator of (line#, text). """ _lines: Iterator[Tuple[int, str]] _next: Tuple[int, str] + _need_next: bool _done: bool def __init__(self, lines: Iterator[Tuple[int, str]]): """Creates a new LineStream that wraps the given iterator.""" self._lines = lines self._done = False + self._need_next = True self._next = (0, '') - self._get_next() def _get_next(self) -> None: - """Advances the LineSteam to the next line.""" + """Advances the LineSteam to the next line, if necessary.""" + if not self._need_next: + return try: self._next = next(self._lines) except StopIteration: self._done = True + finally: + self._need_next = False def peek(self) -> str: """Returns the current line, without advancing the LineStream. """ + self._get_next() return self._next[1] def pop(self) -> str: """Returns the current line and advances the LineStream to the next line. """ - n = self._next - self._get_next() - return n[1] + s = self.peek() + if self._done: + raise ValueError(f'LineStream: going past EOF, last line was {s}') + self._need_next = True + return s def __bool__(self) -> bool: """Returns True if stream has more lines.""" + self._get_next() return not self._done # Only used by kunit_tool_test.py. @@ -216,6 +222,7 @@ class LineStream: def line_number(self) -> int: """Returns the line number of the current line.""" + self._get_next() return self._next[0] # Parsing helper methods: @@ -340,8 +347,8 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: """ Parses test plan line and stores the expected number of subtests in test object. Reports an error if expected count is 0. - Returns False and reports missing test plan error if fails to parse - test plan. + Returns False and sets expected_count to None if there is no valid test + plan. Accepted format: - '1..[number of subtests]' @@ -356,14 +363,10 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: match = TEST_PLAN.match(lines.peek()) if not match: test.expected_count = None - test.add_error('missing plan line!') return False test.log.append(lines.pop()) expected_count = int(match.group(1)) test.expected_count = expected_count - if expected_count == 0: - test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') return True TEST_RESULT = re.compile(r'^(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') @@ -514,7 +517,7 @@ ANSI_LEN = len(red('')) def print_with_timestamp(message: str) -> None: """Prints message with timestamp at beginning.""" - print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message)) + print('[%s] %s' % (datetime.datetime.now().strftime('%H:%M:%S'), message)) def format_test_divider(message: str, len_message: int) -> str: """ @@ -590,6 +593,8 @@ def format_test_result(test: Test) -> str: return (green('[PASSED] ') + test.name) elif test.status == TestStatus.SKIPPED: return (yellow('[SKIPPED] ') + test.name) + elif test.status == TestStatus.NO_TESTS: + return (yellow('[NO TESTS RUN] ') + test.name) elif test.status == TestStatus.TEST_CRASHED: print_log(test.log) return (red('[CRASHED] ') + test.name) @@ -732,6 +737,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: # test plan test.name = "main" parse_test_plan(lines, test) + parent_test = True else: # If KTAP/TAP header is not found, test must be subtest # header or test result line so parse attempt to parser @@ -745,7 +751,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: expected_count = test.expected_count subtests = [] test_num = 1 - while expected_count is None or test_num <= expected_count: + while parent_test and (expected_count is None or test_num <= expected_count): # Loop to parse any subtests. # Break after parsing expected number of tests or # if expected number of tests is unknown break when test @@ -780,9 +786,15 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: parse_test_result(lines, test, expected_num) else: test.add_error('missing subtest result line!') + + # Check for there being no tests + if parent_test and len(subtests) == 0: + test.status = TestStatus.NO_TESTS + test.add_error('0 tests run!') + # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) - if parent_test: + if parent_test and not main: # If test has subtests and is not the main test object, print # footer. print_test_footer(test) @@ -790,7 +802,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str]) -> Test: print_test_result(test) return test -def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: +def parse_run_tests(kernel_output: Iterable[str]) -> Test: """ Using kernel output, extract KTAP lines, parse the lines for test results and print condensed test results and summary line . @@ -799,8 +811,7 @@ def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: kernel_output - Iterable object contains lines of kernel output Return: - TestResult - Tuple containg status of main test object, main test - object with all subtests, and log of all KTAP lines. + Test - the main test object with all subtests. """ print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) @@ -814,4 +825,4 @@ def parse_run_tests(kernel_output: Iterable[str]) -> TestResult: test.status = test.counts.get_status() print_with_timestamp(DIVIDER) print_summary_line(test) - return TestResult(test.status, test, lines) + return test diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 9c4126731457..352369dffbd9 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -13,9 +13,10 @@ import tempfile, shutil # Handling test_tmpdir import itertools import json +import os import signal import subprocess -import os +from typing import Iterable import kunit_config import kunit_parser @@ -50,10 +51,9 @@ class KconfigTest(unittest.TestCase): self.assertFalse(kconfig1.is_subset_of(kconfig0)) def test_read_from_file(self): - kconfig = kunit_config.Kconfig() kconfig_path = test_data_path('test_read_from_file.kconfig') - kconfig.read_from_file(kconfig_path) + kconfig = kunit_config.parse_file(kconfig_path) expected_kconfig = kunit_config.Kconfig() expected_kconfig.add_entry( @@ -86,8 +86,7 @@ class KconfigTest(unittest.TestCase): expected_kconfig.write_to_file(kconfig_path) - actual_kconfig = kunit_config.Kconfig() - actual_kconfig.read_from_file(kconfig_path) + actual_kconfig = kunit_config.parse_file(kconfig_path) self.assertEqual(actual_kconfig.entries(), expected_kconfig.entries()) @@ -179,7 +178,7 @@ class KUnitParserTest(unittest.TestCase): with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.test.subtests)) + self.assertEqual(0, len(result.subtests)) self.assertEqual( kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) @@ -191,7 +190,10 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines( file.readlines())) - self.assertEqual(2, result.test.counts.errors) + # A missing test plan is not an error. + self.assertEqual(0, result.counts.errors) + # All tests should be accounted for. + self.assertEqual(10, result.counts.total()) self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) @@ -201,11 +203,23 @@ class KUnitParserTest(unittest.TestCase): with open(header_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - self.assertEqual(0, len(result.test.subtests)) + self.assertEqual(0, len(result.subtests)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, result.status) + def test_no_tests_no_plan(self): + no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') + with open(no_plan_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.extract_tap_lines(file.readlines())) + self.assertEqual(0, len(result.subtests[0].subtests[0].subtests)) + self.assertEqual( + kunit_parser.TestStatus.NO_TESTS, + result.subtests[0].subtests[0].status) + self.assertEqual(1, result.counts.errors) + + def test_no_kunit_output(self): crash_log = test_data_path('test_insufficient_memory.log') print_mock = mock.patch('builtins.print').start() @@ -214,7 +228,7 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.extract_tap_lines(file.readlines())) print_mock.assert_any_call(StrContains('invalid KTAP input!')) print_mock.stop() - self.assertEqual(0, len(result.test.subtests)) + self.assertEqual(0, len(result.subtests)) def test_crashed_test(self): crashed_log = test_data_path('test_is_test_passed-crash.log') @@ -255,10 +269,10 @@ class KUnitParserTest(unittest.TestCase): result.status) self.assertEqual( "sysctl_test", - result.test.subtests[0].name) + result.subtests[0].name) self.assertEqual( "example", - result.test.subtests[1].name) + result.subtests[1].name) file.close() @@ -269,7 +283,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') @@ -278,7 +292,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') @@ -287,7 +301,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') @@ -296,7 +310,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') @@ -305,7 +319,7 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') @@ -314,7 +328,46 @@ class KUnitParserTest(unittest.TestCase): self.assertEqual( kunit_parser.TestStatus.SUCCESS, result.status) - self.assertEqual('kunit-resource-test', result.test.subtests[0].name) + self.assertEqual('kunit-resource-test', result.subtests[0].name) + +def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: + return kunit_parser.LineStream(enumerate(strs, start=1)) + +class LineStreamTest(unittest.TestCase): + + def test_basic(self): + stream = line_stream_from_strs(['hello', 'world']) + + self.assertTrue(stream, msg='Should be more input') + self.assertEqual(stream.line_number(), 1) + self.assertEqual(stream.peek(), 'hello') + self.assertEqual(stream.pop(), 'hello') + + self.assertTrue(stream, msg='Should be more input') + self.assertEqual(stream.line_number(), 2) + self.assertEqual(stream.peek(), 'world') + self.assertEqual(stream.pop(), 'world') + + self.assertFalse(stream, msg='Should be no more input') + with self.assertRaisesRegex(ValueError, 'LineStream: going past EOF'): + stream.pop() + + def test_is_lazy(self): + called_times = 0 + def generator(): + nonlocal called_times + for i in range(1,5): + called_times += 1 + yield called_times, str(called_times) + + stream = kunit_parser.LineStream(generator()) + self.assertEqual(called_times, 0) + + self.assertEqual(stream.pop(), '1') + self.assertEqual(called_times, 1) + + self.assertEqual(stream.pop(), '2') + self.assertEqual(called_times, 2) class LinuxSourceTreeTest(unittest.TestCase): @@ -336,6 +389,10 @@ class LinuxSourceTreeTest(unittest.TestCase): pass kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) + def test_kconfig_add(self): + tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y']) + self.assertIn(kunit_config.KconfigEntry('NOT_REAL', 'y'), tree._kconfig.entries()) + def test_invalid_arch(self): with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'): kunit_kernel.LinuxSourceTree('', arch='invalid') @@ -356,6 +413,51 @@ class LinuxSourceTreeTest(unittest.TestCase): with open(kunit_kernel.get_outfile_path(build_dir), 'rt') as outfile: self.assertEqual(outfile.read(), 'hi\nbye\n', msg='Missing some output') + def test_build_reconfig_no_config(self): + with tempfile.TemporaryDirectory('') as build_dir: + with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y') + + tree = kunit_kernel.LinuxSourceTree(build_dir) + mock_build_config = mock.patch.object(tree, 'build_config').start() + + # Should generate the .config + self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) + mock_build_config.assert_called_once_with(build_dir, []) + + def test_build_reconfig_existing_config(self): + with tempfile.TemporaryDirectory('') as build_dir: + # Existing .config is a superset, should not touch it + with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y') + with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y') + with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') + + tree = kunit_kernel.LinuxSourceTree(build_dir) + mock_build_config = mock.patch.object(tree, 'build_config').start() + + self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) + self.assertEqual(mock_build_config.call_count, 0) + + def test_build_reconfig_remove_option(self): + with tempfile.TemporaryDirectory('') as build_dir: + # We removed CONFIG_KUNIT_TEST=y from our .kunitconfig... + with open(kunit_kernel.get_kunitconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y') + with open(kunit_kernel.get_old_kunitconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') + with open(kunit_kernel.get_kconfig_path(build_dir), 'w') as f: + f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') + + tree = kunit_kernel.LinuxSourceTree(build_dir) + mock_build_config = mock.patch.object(tree, 'build_config').start() + + # ... so we should trigger a call to build_config() + self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) + mock_build_config.assert_called_once_with(build_dir, []) + # TODO: add more test cases. @@ -365,7 +467,7 @@ class KUnitJsonTest(unittest.TestCase): with open(test_data_path(log_file)) as file: test_result = kunit_parser.parse_run_tests(file) json_obj = kunit_json.get_json_result( - test_result=test_result, + test=test_result, def_config='kunit_defconfig', build_dir=None, json_path='stdout') @@ -383,6 +485,12 @@ class KUnitJsonTest(unittest.TestCase): {'name': 'example_simple_test', 'status': 'ERROR'}, result["sub_groups"][1]["test_cases"][0]) + def test_skipped_test_json(self): + result = self._json_for('test_skip_tests.log') + self.assertEqual( + {'name': 'example_skip_test', 'status': 'SKIP'}, + result["sub_groups"][1]["test_cases"][1]) + def test_no_tests_json(self): result = self._json_for('test_is_test_passed-no_tests_run_with_header.log') self.assertEqual(0, len(result['sub_groups'])) @@ -418,8 +526,8 @@ class KUnitMainTest(unittest.TestCase): def test_build_passes_args_pass(self): kunit.main(['build'], self.linux_source_mock) - self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) - self.linux_source_mock.build_kernel.assert_called_once_with(False, 8, '.kunit', None) + self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) + self.linux_source_mock.build_kernel.assert_called_once_with(False, kunit.get_default_jobs(), '.kunit', None) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_exec_passes_args_pass(self): @@ -525,8 +633,9 @@ class KUnitMainTest(unittest.TestCase): def test_build_builddir(self): build_dir = '.kunit' + jobs = kunit.get_default_jobs() kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock) - self.linux_source_mock.build_kernel.assert_called_once_with(False, 8, build_dir, None) + self.linux_source_mock.build_kernel.assert_called_once_with(False, jobs, build_dir, None) def test_exec_builddir(self): build_dir = '.kunit' @@ -542,6 +651,7 @@ class KUnitMainTest(unittest.TestCase): # Just verify that we parsed and initialized it correctly here. mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig', + kconfig_add=None, arch='um', cross_compile=None, qemu_config_path=None) @@ -553,6 +663,19 @@ class KUnitMainTest(unittest.TestCase): # Just verify that we parsed and initialized it correctly here. mock_linux_init.assert_called_once_with('.kunit', kunitconfig_path='mykunitconfig', + kconfig_add=None, + arch='um', + cross_compile=None, + qemu_config_path=None) + + @mock.patch.object(kunit_kernel, 'LinuxSourceTree') + def test_run_kconfig_add(self, mock_linux_init): + mock_linux_init.return_value = self.linux_source_mock + kunit.main(['run', '--kconfig_add=CONFIG_KASAN=y', '--kconfig_add=CONFIG_KCSAN=y']) + # Just verify that we parsed and initialized it correctly here. + mock_linux_init.assert_called_once_with('.kunit', + kunitconfig_path=None, + kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'], arch='um', cross_compile=None, qemu_config_path=None) @@ -569,7 +692,7 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(300, '.kunit', False, 'suite*', None, 'suite')) + kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'suite')) self.assertEqual(got, want) # Should respect the user's filter glob when listing tests. @@ -584,7 +707,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(300, '.kunit', False, 'suite*.test*', None, 'suite')) + kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*.test*', None, 'suite')) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300), @@ -597,7 +720,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(300, '.kunit', False, 'suite*', None, 'test')) + kunit.KunitExecRequest(None, '.kunit', None, 300, False, 'suite*', None, 'test')) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300), diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py new file mode 100755 index 000000000000..4f32133ed77c --- /dev/null +++ b/tools/testing/kunit/run_checks.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# This file runs some basic checks to verify kunit works. +# It is only of interest if you're making changes to KUnit itself. +# +# Copyright (C) 2021, Google LLC. +# Author: Daniel Latypov <dlatypov@google.com.com> + +from concurrent import futures +import datetime +import os +import shutil +import subprocess +import sys +import textwrap +from typing import Dict, List, Sequence, Tuple + +ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__)) +TIMEOUT = datetime.timedelta(minutes=5).total_seconds() + +commands: Dict[str, Sequence[str]] = { + 'kunit_tool_test.py': ['./kunit_tool_test.py'], + 'kunit smoke test': ['./kunit.py', 'run', '--kunitconfig=lib/kunit', '--build_dir=kunit_run_checks'], + 'pytype': ['/bin/sh', '-c', 'pytype *.py'], + 'mypy': ['/bin/sh', '-c', 'mypy *.py'], +} + +# The user might not have mypy or pytype installed, skip them if so. +# Note: you can install both via `$ pip install mypy pytype` +necessary_deps : Dict[str, str] = { + 'pytype': 'pytype', + 'mypy': 'mypy', +} + +def main(argv: Sequence[str]) -> None: + if argv: + raise RuntimeError('This script takes no arguments') + + future_to_name: Dict[futures.Future, str] = {} + executor = futures.ThreadPoolExecutor(max_workers=len(commands)) + for name, argv in commands.items(): + if name in necessary_deps and shutil.which(necessary_deps[name]) is None: + print(f'{name}: SKIPPED, {necessary_deps[name]} not in $PATH') + continue + f = executor.submit(run_cmd, argv) + future_to_name[f] = name + + has_failures = False + print(f'Waiting on {len(future_to_name)} checks ({", ".join(future_to_name.values())})...') + for f in futures.as_completed(future_to_name.keys()): + name = future_to_name[f] + ex = f.exception() + if not ex: + print(f'{name}: PASSED') + continue + + has_failures = True + if isinstance(ex, subprocess.TimeoutExpired): + print(f'{name}: TIMED OUT') + elif isinstance(ex, subprocess.CalledProcessError): + print(f'{name}: FAILED') + else: + print('{name}: unexpected exception: {ex}') + continue + + output = ex.output + if output: + print(textwrap.indent(output.decode(), '> ')) + executor.shutdown() + + if has_failures: + sys.exit(1) + + +def run_cmd(argv: Sequence[str]): + subprocess.check_output(argv, stderr=subprocess.STDOUT, cwd=ABS_TOOL_PATH, timeout=TIMEOUT) + + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log new file mode 100644 index 000000000000..dd873c981108 --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_no_plan.log @@ -0,0 +1,7 @@ +TAP version 14 +1..1 + # Subtest: suite + 1..1 + # Subtest: case + ok 1 - case # SKIP +ok 1 - suite diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 47f9cc9dcd94..c57d9e9d4480 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -35,8 +35,6 @@ obj-$(CONFIG_DAX) += dax.o endif obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o -obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem_core.o -obj-$(CONFIG_DEV_DAX_PMEM_COMPAT) += dax_pmem_compat.o nfit-y := $(ACPI_SRC)/core.o nfit-y += $(ACPI_SRC)/intel.o @@ -67,12 +65,8 @@ device_dax-y += dax-dev.o device_dax-y += device_dax_test.o device_dax-y += config_check.o -dax_pmem-y := $(DAX_SRC)/pmem/pmem.o +dax_pmem-y := $(DAX_SRC)/pmem.o dax_pmem-y += dax_pmem_test.o -dax_pmem_core-y := $(DAX_SRC)/pmem/core.o -dax_pmem_core-y += dax_pmem_core_test.o -dax_pmem_compat-y := $(DAX_SRC)/pmem/compat.o -dax_pmem_compat-y += dax_pmem_compat_test.o dax_pmem-y += config_check.o libnvdimm-y := $(NVDIMM_SRC)/core.o diff --git a/tools/testing/nvdimm/dax_pmem_compat_test.c b/tools/testing/nvdimm/dax_pmem_compat_test.c deleted file mode 100644 index 7cd1877f3765..000000000000 --- a/tools/testing/nvdimm/dax_pmem_compat_test.c +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright(c) 2019 Intel Corporation. All rights reserved. - -#include <linux/module.h> -#include <linux/printk.h> -#include "watermark.h" - -nfit_test_watermark(dax_pmem_compat); diff --git a/tools/testing/nvdimm/dax_pmem_core_test.c b/tools/testing/nvdimm/dax_pmem_core_test.c deleted file mode 100644 index a4249cdbeec1..000000000000 --- a/tools/testing/nvdimm/dax_pmem_core_test.c +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright(c) 2019 Intel Corporation. All rights reserved. - -#include <linux/module.h> -#include <linux/printk.h> -#include "watermark.h" - -nfit_test_watermark(dax_pmem_core); diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index ed563bdd88f3..b752ce47ead3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -100,25 +100,17 @@ static void nfit_test_kill(void *_pgmap) { struct dev_pagemap *pgmap = _pgmap; - WARN_ON(!pgmap || !pgmap->ref); - - if (pgmap->ops && pgmap->ops->kill) - pgmap->ops->kill(pgmap); - else - percpu_ref_kill(pgmap->ref); - - if (pgmap->ops && pgmap->ops->cleanup) { - pgmap->ops->cleanup(pgmap); - } else { - wait_for_completion(&pgmap->done); - percpu_ref_exit(pgmap->ref); - } + WARN_ON(!pgmap); + + percpu_ref_kill(&pgmap->ref); + + wait_for_completion(&pgmap->done); + percpu_ref_exit(&pgmap->ref); } static void dev_pagemap_percpu_release(struct percpu_ref *ref) { - struct dev_pagemap *pgmap = - container_of(ref, struct dev_pagemap, internal_ref); + struct dev_pagemap *pgmap = container_of(ref, struct dev_pagemap, ref); complete(&pgmap->done); } @@ -132,22 +124,11 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) if (!nfit_res) return devm_memremap_pages(dev, pgmap); - if (!pgmap->ref) { - if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup)) - return ERR_PTR(-EINVAL); - - init_completion(&pgmap->done); - error = percpu_ref_init(&pgmap->internal_ref, - dev_pagemap_percpu_release, 0, GFP_KERNEL); - if (error) - return ERR_PTR(error); - pgmap->ref = &pgmap->internal_ref; - } else { - if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) { - WARN(1, "Missing reference count teardown definition\n"); - return ERR_PTR(-EINVAL); - } - } + init_completion(&pgmap->done); + error = percpu_ref_init(&pgmap->ref, dev_pagemap_percpu_release, 0, + GFP_KERNEL); + if (error) + return ERR_PTR(error); error = devm_add_action_or_reset(dev, nfit_test_kill, pgmap); if (error) diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index 6862915f1fb0..3ca7c32e9362 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -1054,10 +1054,6 @@ static __init int ndtest_init(void) libnvdimm_test(); device_dax_test(); dax_pmem_test(); - dax_pmem_core_test(); -#ifdef CONFIG_DEV_DAX_PMEM_COMPAT - dax_pmem_compat_test(); -#endif nfit_test_setup(ndtest_resource_lookup, NULL); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index b1bff5fb0f65..0bc91ffee257 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3300,10 +3300,6 @@ static __init int nfit_test_init(void) acpi_nfit_test(); device_dax_test(); dax_pmem_test(); - dax_pmem_core_test(); -#ifdef CONFIG_DEV_DAX_PMEM_COMPAT - dax_pmem_compat_test(); -#endif nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c852eb40c4f7..d08fe4cfe811 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TARGETS = arm64 +TARGETS += alsa +TARGETS += arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore new file mode 100644 index 000000000000..3bb7c41266a8 --- /dev/null +++ b/tools/testing/selftests/alsa/.gitignore @@ -0,0 +1 @@ +mixer-test diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile new file mode 100644 index 000000000000..f64d9090426d --- /dev/null +++ b/tools/testing/selftests/alsa/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# + +CFLAGS += $(shell pkg-config --cflags alsa) +LDLIBS += $(shell pkg-config --libs alsa) + +TEST_GEN_PROGS := mixer-test + +include ../lib.mk diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c new file mode 100644 index 000000000000..17f158d7a767 --- /dev/null +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -0,0 +1,705 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// kselftest for the ALSA mixer API +// +// Original author: Mark Brown <broonie@kernel.org> +// Copyright (c) 2021 Arm Limited + +// This test will iterate over all cards detected in the system, exercising +// every mixer control it can find. This may conflict with other system +// software if there is audio activity so is best run on a system with a +// minimal active userspace. + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <getopt.h> +#include <stdarg.h> +#include <ctype.h> +#include <math.h> +#include <errno.h> +#include <assert.h> +#include <alsa/asoundlib.h> +#include <poll.h> +#include <stdint.h> + +#include "../kselftest.h" + +#define TESTS_PER_CONTROL 3 + +struct card_data { + snd_ctl_t *handle; + int card; + int num_ctls; + snd_ctl_elem_list_t *ctls; + struct card_data *next; +}; + +struct ctl_data { + const char *name; + snd_ctl_elem_id_t *id; + snd_ctl_elem_info_t *info; + snd_ctl_elem_value_t *def_val; + int elem; + struct card_data *card; + struct ctl_data *next; +}; + +static const char *alsa_config = +"ctl.hw {\n" +" @args [ CARD ]\n" +" @args.CARD.type string\n" +" type hw\n" +" card $CARD\n" +"}\n" +; + +int num_cards = 0; +int num_controls = 0; +struct card_data *card_list = NULL; +struct ctl_data *ctl_list = NULL; + +#ifdef SND_LIB_VER +#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6) +#define LIB_HAS_LOAD_STRING +#endif +#endif + +#ifndef LIB_HAS_LOAD_STRING +int snd_config_load_string(snd_config_t **config, const char *s, size_t size) +{ + snd_input_t *input; + snd_config_t *dst; + int err; + + assert(config && s); + if (size == 0) + size = strlen(s); + err = snd_input_buffer_open(&input, s, size); + if (err < 0) + return err; + err = snd_config_top(&dst); + if (err < 0) { + snd_input_close(input); + return err; + } + err = snd_config_load(dst, input); + snd_input_close(input); + if (err < 0) { + snd_config_delete(dst); + return err; + } + *config = dst; + return 0; +} +#endif + +void find_controls(void) +{ + char name[32]; + int card, ctl, err; + struct card_data *card_data; + struct ctl_data *ctl_data; + snd_config_t *config; + + card = -1; + if (snd_card_next(&card) < 0 || card < 0) + return; + + err = snd_config_load_string(&config, alsa_config, strlen(alsa_config)); + if (err < 0) { + ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n", + snd_strerror(err)); + ksft_exit_fail(); + } + + while (card >= 0) { + sprintf(name, "hw:%d", card); + + card_data = malloc(sizeof(*card_data)); + if (!card_data) + ksft_exit_fail_msg("Out of memory\n"); + + err = snd_ctl_open_lconf(&card_data->handle, name, 0, config); + if (err < 0) { + ksft_print_msg("Failed to get hctl for card %d: %s\n", + card, snd_strerror(err)); + goto next_card; + } + + /* Count controls */ + snd_ctl_elem_list_malloc(&card_data->ctls); + snd_ctl_elem_list(card_data->handle, card_data->ctls); + card_data->num_ctls = snd_ctl_elem_list_get_count(card_data->ctls); + + /* Enumerate control information */ + snd_ctl_elem_list_alloc_space(card_data->ctls, card_data->num_ctls); + snd_ctl_elem_list(card_data->handle, card_data->ctls); + + card_data->card = num_cards++; + card_data->next = card_list; + card_list = card_data; + + num_controls += card_data->num_ctls; + + for (ctl = 0; ctl < card_data->num_ctls; ctl++) { + ctl_data = malloc(sizeof(*ctl_data)); + if (!ctl_data) + ksft_exit_fail_msg("Out of memory\n"); + + ctl_data->card = card_data; + ctl_data->elem = ctl; + ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls, + ctl); + + err = snd_ctl_elem_id_malloc(&ctl_data->id); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + + err = snd_ctl_elem_info_malloc(&ctl_data->info); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + + err = snd_ctl_elem_value_malloc(&ctl_data->def_val); + if (err < 0) + ksft_exit_fail_msg("Out of memory\n"); + + snd_ctl_elem_list_get_id(card_data->ctls, ctl, + ctl_data->id); + snd_ctl_elem_info_set_id(ctl_data->info, ctl_data->id); + err = snd_ctl_elem_info(card_data->handle, + ctl_data->info); + if (err < 0) { + ksft_print_msg("%s getting info for %d\n", + snd_strerror(err), + ctl_data->name); + } + + snd_ctl_elem_value_set_id(ctl_data->def_val, + ctl_data->id); + + ctl_data->next = ctl_list; + ctl_list = ctl_data; + } + + next_card: + if (snd_card_next(&card) < 0) { + ksft_print_msg("snd_card_next"); + break; + } + } + + snd_config_delete(config); +} + +bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val, + int index) +{ + long int_val; + long long int64_val; + + switch (snd_ctl_elem_info_get_type(ctl->info)) { + case SND_CTL_ELEM_TYPE_NONE: + ksft_print_msg("%s.%d Invalid control type NONE\n", + ctl->name, index); + return false; + + case SND_CTL_ELEM_TYPE_BOOLEAN: + int_val = snd_ctl_elem_value_get_boolean(val, index); + switch (int_val) { + case 0: + case 1: + break; + default: + ksft_print_msg("%s.%d Invalid boolean value %ld\n", + ctl->name, index, int_val); + return false; + } + break; + + case SND_CTL_ELEM_TYPE_INTEGER: + int_val = snd_ctl_elem_value_get_integer(val, index); + + if (int_val < snd_ctl_elem_info_get_min(ctl->info)) { + ksft_print_msg("%s.%d value %ld less than minimum %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_min(ctl->info)); + return false; + } + + if (int_val > snd_ctl_elem_info_get_max(ctl->info)) { + ksft_print_msg("%s.%d value %ld more than maximum %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_max(ctl->info)); + return false; + } + + /* Only check step size if there is one and we're in bounds */ + if (snd_ctl_elem_info_get_step(ctl->info) && + (int_val - snd_ctl_elem_info_get_min(ctl->info) % + snd_ctl_elem_info_get_step(ctl->info))) { + ksft_print_msg("%s.%d value %ld invalid for step %ld minimum %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_step(ctl->info), + snd_ctl_elem_info_get_min(ctl->info)); + return false; + } + break; + + case SND_CTL_ELEM_TYPE_INTEGER64: + int64_val = snd_ctl_elem_value_get_integer64(val, index); + + if (int64_val < snd_ctl_elem_info_get_min64(ctl->info)) { + ksft_print_msg("%s.%d value %lld less than minimum %lld\n", + ctl->name, index, int64_val, + snd_ctl_elem_info_get_min64(ctl->info)); + return false; + } + + if (int64_val > snd_ctl_elem_info_get_max64(ctl->info)) { + ksft_print_msg("%s.%d value %lld more than maximum %lld\n", + ctl->name, index, int64_val, + snd_ctl_elem_info_get_max(ctl->info)); + return false; + } + + /* Only check step size if there is one and we're in bounds */ + if (snd_ctl_elem_info_get_step64(ctl->info) && + (int64_val - snd_ctl_elem_info_get_min64(ctl->info)) % + snd_ctl_elem_info_get_step64(ctl->info)) { + ksft_print_msg("%s.%d value %lld invalid for step %lld minimum %lld\n", + ctl->name, index, int64_val, + snd_ctl_elem_info_get_step64(ctl->info), + snd_ctl_elem_info_get_min64(ctl->info)); + return false; + } + break; + + case SND_CTL_ELEM_TYPE_ENUMERATED: + int_val = snd_ctl_elem_value_get_enumerated(val, index); + + if (int_val < 0) { + ksft_print_msg("%s.%d negative value %ld for enumeration\n", + ctl->name, index, int_val); + return false; + } + + if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) { + ksft_print_msg("%s.%d value %ld more than item count %ld\n", + ctl->name, index, int_val, + snd_ctl_elem_info_get_items(ctl->info)); + return false; + } + break; + + default: + /* No tests for other types */ + break; + } + + return true; +} + +/* + * Check that the provided value meets the constraints for the + * provided control. + */ +bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val) +{ + int i; + bool valid = true; + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) + if (!ctl_value_index_valid(ctl, val, i)) + valid = false; + + return valid; +} + +/* + * Check that we can read the default value and it is valid. Write + * tests use the read value to restore the default. + */ +void test_ctl_get_value(struct ctl_data *ctl) +{ + int err; + + /* If the control is turned off let's be polite */ + if (snd_ctl_elem_info_is_inactive(ctl->info)) { + ksft_print_msg("%s is inactive\n", ctl->name); + ksft_test_result_skip("get_value.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + /* Can't test reading on an unreadable control */ + if (!snd_ctl_elem_info_is_readable(ctl->info)) { + ksft_print_msg("%s is not readable\n", ctl->name); + ksft_test_result_skip("get_value.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + err = snd_ctl_elem_read(ctl->card->handle, ctl->def_val); + if (err < 0) { + ksft_print_msg("snd_ctl_elem_read() failed: %s\n", + snd_strerror(err)); + goto out; + } + + if (!ctl_value_valid(ctl, ctl->def_val)) + err = -EINVAL; + +out: + ksft_test_result(err >= 0, "get_value.%d.%d\n", + ctl->card->card, ctl->elem); +} + +bool show_mismatch(struct ctl_data *ctl, int index, + snd_ctl_elem_value_t *read_val, + snd_ctl_elem_value_t *expected_val) +{ + long long expected_int, read_int; + + /* + * We factor out the code to compare values representable as + * integers, ensure that check doesn't log otherwise. + */ + expected_int = 0; + read_int = 0; + + switch (snd_ctl_elem_info_get_type(ctl->info)) { + case SND_CTL_ELEM_TYPE_BOOLEAN: + expected_int = snd_ctl_elem_value_get_boolean(expected_val, + index); + read_int = snd_ctl_elem_value_get_boolean(read_val, index); + break; + + case SND_CTL_ELEM_TYPE_INTEGER: + expected_int = snd_ctl_elem_value_get_integer(expected_val, + index); + read_int = snd_ctl_elem_value_get_integer(read_val, index); + break; + + case SND_CTL_ELEM_TYPE_INTEGER64: + expected_int = snd_ctl_elem_value_get_integer64(expected_val, + index); + read_int = snd_ctl_elem_value_get_integer64(read_val, + index); + break; + + case SND_CTL_ELEM_TYPE_ENUMERATED: + expected_int = snd_ctl_elem_value_get_enumerated(expected_val, + index); + read_int = snd_ctl_elem_value_get_enumerated(read_val, + index); + break; + + default: + break; + } + + if (expected_int != read_int) { + /* + * NOTE: The volatile attribute means that the hardware + * can voluntarily change the state of control element + * independent of any operation by software. + */ + bool is_volatile = snd_ctl_elem_info_is_volatile(ctl->info); + ksft_print_msg("%s.%d expected %lld but read %lld, is_volatile %d\n", + ctl->name, index, expected_int, read_int, is_volatile); + return !is_volatile; + } else { + return false; + } +} + +/* + * Write a value then if possible verify that we get the expected + * result. An optional expected value can be provided if we expect + * the write to fail, for verifying that invalid writes don't corrupt + * anything. + */ +int write_and_verify(struct ctl_data *ctl, + snd_ctl_elem_value_t *write_val, + snd_ctl_elem_value_t *expected_val) +{ + int err, i; + bool error_expected, mismatch_shown; + snd_ctl_elem_value_t *read_val, *w_val; + snd_ctl_elem_value_alloca(&read_val); + snd_ctl_elem_value_alloca(&w_val); + + /* + * We need to copy the write value since writing can modify + * the value which causes surprises, and allocate an expected + * value if we expect to read back what we wrote. + */ + snd_ctl_elem_value_copy(w_val, write_val); + if (expected_val) { + error_expected = true; + } else { + error_expected = false; + snd_ctl_elem_value_alloca(&expected_val); + snd_ctl_elem_value_copy(expected_val, write_val); + } + + /* + * Do the write, if we have an expected value ignore the error + * and carry on to validate the expected value. + */ + err = snd_ctl_elem_write(ctl->card->handle, w_val); + if (err < 0 && !error_expected) { + ksft_print_msg("snd_ctl_elem_write() failed: %s\n", + snd_strerror(err)); + return err; + } + + /* Can we do the verification part? */ + if (!snd_ctl_elem_info_is_readable(ctl->info)) + return err; + + snd_ctl_elem_value_set_id(read_val, ctl->id); + + err = snd_ctl_elem_read(ctl->card->handle, read_val); + if (err < 0) { + ksft_print_msg("snd_ctl_elem_read() failed: %s\n", + snd_strerror(err)); + return err; + } + + /* + * Use the libray to compare values, if there's a mismatch + * carry on and try to provide a more useful diagnostic than + * just "mismatch". + */ + if (!snd_ctl_elem_value_compare(expected_val, read_val)) + return 0; + + mismatch_shown = false; + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) + if (show_mismatch(ctl, i, read_val, expected_val)) + mismatch_shown = true; + + if (!mismatch_shown) + ksft_print_msg("%s read and written values differ\n", + ctl->name); + + return -1; +} + +/* + * Make sure we can write the default value back to the control, this + * should validate that at least some write works. + */ +void test_ctl_write_default(struct ctl_data *ctl) +{ + int err; + + /* If the control is turned off let's be polite */ + if (snd_ctl_elem_info_is_inactive(ctl->info)) { + ksft_print_msg("%s is inactive\n", ctl->name); + ksft_test_result_skip("write_default.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + if (!snd_ctl_elem_info_is_writable(ctl->info)) { + ksft_print_msg("%s is not writeable\n", ctl->name); + ksft_test_result_skip("write_default.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + /* No idea what the default was for unreadable controls */ + if (!snd_ctl_elem_info_is_readable(ctl->info)) { + ksft_print_msg("%s couldn't read default\n", ctl->name); + ksft_test_result_skip("write_default.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + err = write_and_verify(ctl, ctl->def_val, NULL); + + ksft_test_result(err >= 0, "write_default.%d.%d\n", + ctl->card->card, ctl->elem); +} + +bool test_ctl_write_valid_boolean(struct ctl_data *ctl) +{ + int err, i, j; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = 0; j < 2; j++) { + snd_ctl_elem_value_set_boolean(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + return !fail; +} + +bool test_ctl_write_valid_integer(struct ctl_data *ctl) +{ + int err; + int i; + long j, step; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + step = snd_ctl_elem_info_get_step(ctl->info); + if (!step) + step = 1; + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = snd_ctl_elem_info_get_min(ctl->info); + j <= snd_ctl_elem_info_get_max(ctl->info); j += step) { + + snd_ctl_elem_value_set_integer(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + + return !fail; +} + +bool test_ctl_write_valid_integer64(struct ctl_data *ctl) +{ + int err, i; + long long j, step; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + step = snd_ctl_elem_info_get_step64(ctl->info); + if (!step) + step = 1; + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = snd_ctl_elem_info_get_min64(ctl->info); + j <= snd_ctl_elem_info_get_max64(ctl->info); j += step) { + + snd_ctl_elem_value_set_integer64(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + return !fail; +} + +bool test_ctl_write_valid_enumerated(struct ctl_data *ctl) +{ + int err, i, j; + bool fail = false; + snd_ctl_elem_value_t *val; + snd_ctl_elem_value_alloca(&val); + + snd_ctl_elem_value_set_id(val, ctl->id); + + for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) { + for (j = 0; j < snd_ctl_elem_info_get_items(ctl->info); j++) { + snd_ctl_elem_value_set_enumerated(val, i, j); + err = write_and_verify(ctl, val, NULL); + if (err != 0) + fail = true; + } + } + + return !fail; +} + +void test_ctl_write_valid(struct ctl_data *ctl) +{ + bool pass; + int err; + + /* If the control is turned off let's be polite */ + if (snd_ctl_elem_info_is_inactive(ctl->info)) { + ksft_print_msg("%s is inactive\n", ctl->name); + ksft_test_result_skip("write_valid.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + if (!snd_ctl_elem_info_is_writable(ctl->info)) { + ksft_print_msg("%s is not writeable\n", ctl->name); + ksft_test_result_skip("write_valid.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + switch (snd_ctl_elem_info_get_type(ctl->info)) { + case SND_CTL_ELEM_TYPE_BOOLEAN: + pass = test_ctl_write_valid_boolean(ctl); + break; + + case SND_CTL_ELEM_TYPE_INTEGER: + pass = test_ctl_write_valid_integer(ctl); + break; + + case SND_CTL_ELEM_TYPE_INTEGER64: + pass = test_ctl_write_valid_integer64(ctl); + break; + + case SND_CTL_ELEM_TYPE_ENUMERATED: + pass = test_ctl_write_valid_enumerated(ctl); + break; + + default: + /* No tests for this yet */ + ksft_test_result_skip("write_valid.%d.%d\n", + ctl->card->card, ctl->elem); + return; + } + + /* Restore the default value to minimise disruption */ + err = write_and_verify(ctl, ctl->def_val, NULL); + if (err < 0) + pass = false; + + ksft_test_result(pass, "write_valid.%d.%d\n", + ctl->card->card, ctl->elem); +} + +int main(void) +{ + struct ctl_data *ctl; + + ksft_print_header(); + + find_controls(); + + ksft_set_plan(num_controls * TESTS_PER_CONTROL); + + for (ctl = ctl_list; ctl != NULL; ctl = ctl->next) { + /* + * Must test get_value() before we write anything, the + * test stores the default value for later cleanup. + */ + test_ctl_get_value(ctl); + test_ctl_write_default(ctl); + test_ctl_write_valid(ctl); + } + + ksft_exit_pass(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index ced910fb4019..1e8d9a8f59df 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte bti +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore new file mode 100644 index 000000000000..b79cf5814c23 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/.gitignore @@ -0,0 +1 @@ +syscall-abi diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile new file mode 100644 index 000000000000..96eba974ac8d --- /dev/null +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited + +TEST_GEN_PROGS := syscall-abi + +include ../../lib.mk + +$(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S new file mode 100644 index 000000000000..983467cfcee0 --- /dev/null +++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// +// Assembly portion of the syscall ABI test + +// +// Load values from memory into registers, invoke a syscall and save the +// register values back to memory for later checking. The syscall to be +// invoked is configured in x8 of the input GPR data. +// +// x0: SVE VL, 0 for FP only +// +// GPRs: gpr_in, gpr_out +// FPRs: fpr_in, fpr_out +// Zn: z_in, z_out +// Pn: p_in, p_out +// FFR: ffr_in, ffr_out + +.arch_extension sve + +.globl do_syscall +do_syscall: + // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1 + stp x29, x30, [sp, #-112]! + mov x29, sp + stp x0, x1, [sp, #16] + stp x19, x20, [sp, #32] + stp x21, x22, [sp, #48] + stp x23, x24, [sp, #64] + stp x25, x26, [sp, #80] + stp x27, x28, [sp, #96] + + // Load GPRs x8-x28, and save our SP/FP for later comparison + ldr x2, =gpr_in + add x2, x2, #64 + ldp x8, x9, [x2], #16 + ldp x10, x11, [x2], #16 + ldp x12, x13, [x2], #16 + ldp x14, x15, [x2], #16 + ldp x16, x17, [x2], #16 + ldp x18, x19, [x2], #16 + ldp x20, x21, [x2], #16 + ldp x22, x23, [x2], #16 + ldp x24, x25, [x2], #16 + ldp x26, x27, [x2], #16 + ldr x28, [x2], #8 + str x29, [x2], #8 // FP + str x30, [x2], #8 // LR + + // Load FPRs if we're not doing SVE + cbnz x0, 1f + ldr x2, =fpr_in + ldp q0, q1, [x2] + ldp q2, q3, [x2, #16 * 2] + ldp q4, q5, [x2, #16 * 4] + ldp q6, q7, [x2, #16 * 6] + ldp q8, q9, [x2, #16 * 8] + ldp q10, q11, [x2, #16 * 10] + ldp q12, q13, [x2, #16 * 12] + ldp q14, q15, [x2, #16 * 14] + ldp q16, q17, [x2, #16 * 16] + ldp q18, q19, [x2, #16 * 18] + ldp q20, q21, [x2, #16 * 20] + ldp q22, q23, [x2, #16 * 22] + ldp q24, q25, [x2, #16 * 24] + ldp q26, q27, [x2, #16 * 26] + ldp q28, q29, [x2, #16 * 28] + ldp q30, q31, [x2, #16 * 30] +1: + + // Load the SVE registers if we're doing SVE + cbz x0, 1f + + ldr x2, =z_in + ldr z0, [x2, #0, MUL VL] + ldr z1, [x2, #1, MUL VL] + ldr z2, [x2, #2, MUL VL] + ldr z3, [x2, #3, MUL VL] + ldr z4, [x2, #4, MUL VL] + ldr z5, [x2, #5, MUL VL] + ldr z6, [x2, #6, MUL VL] + ldr z7, [x2, #7, MUL VL] + ldr z8, [x2, #8, MUL VL] + ldr z9, [x2, #9, MUL VL] + ldr z10, [x2, #10, MUL VL] + ldr z11, [x2, #11, MUL VL] + ldr z12, [x2, #12, MUL VL] + ldr z13, [x2, #13, MUL VL] + ldr z14, [x2, #14, MUL VL] + ldr z15, [x2, #15, MUL VL] + ldr z16, [x2, #16, MUL VL] + ldr z17, [x2, #17, MUL VL] + ldr z18, [x2, #18, MUL VL] + ldr z19, [x2, #19, MUL VL] + ldr z20, [x2, #20, MUL VL] + ldr z21, [x2, #21, MUL VL] + ldr z22, [x2, #22, MUL VL] + ldr z23, [x2, #23, MUL VL] + ldr z24, [x2, #24, MUL VL] + ldr z25, [x2, #25, MUL VL] + ldr z26, [x2, #26, MUL VL] + ldr z27, [x2, #27, MUL VL] + ldr z28, [x2, #28, MUL VL] + ldr z29, [x2, #29, MUL VL] + ldr z30, [x2, #30, MUL VL] + ldr z31, [x2, #31, MUL VL] + + ldr x2, =ffr_in + ldr p0, [x2, #0] + wrffr p0.b + + ldr x2, =p_in + ldr p0, [x2, #0, MUL VL] + ldr p1, [x2, #1, MUL VL] + ldr p2, [x2, #2, MUL VL] + ldr p3, [x2, #3, MUL VL] + ldr p4, [x2, #4, MUL VL] + ldr p5, [x2, #5, MUL VL] + ldr p6, [x2, #6, MUL VL] + ldr p7, [x2, #7, MUL VL] + ldr p8, [x2, #8, MUL VL] + ldr p9, [x2, #9, MUL VL] + ldr p10, [x2, #10, MUL VL] + ldr p11, [x2, #11, MUL VL] + ldr p12, [x2, #12, MUL VL] + ldr p13, [x2, #13, MUL VL] + ldr p14, [x2, #14, MUL VL] + ldr p15, [x2, #15, MUL VL] +1: + + // Do the syscall + svc #0 + + // Save GPRs x8-x30 + ldr x2, =gpr_out + add x2, x2, #64 + stp x8, x9, [x2], #16 + stp x10, x11, [x2], #16 + stp x12, x13, [x2], #16 + stp x14, x15, [x2], #16 + stp x16, x17, [x2], #16 + stp x18, x19, [x2], #16 + stp x20, x21, [x2], #16 + stp x22, x23, [x2], #16 + stp x24, x25, [x2], #16 + stp x26, x27, [x2], #16 + stp x28, x29, [x2], #16 + str x30, [x2] + + // Restore x0 and x1 for feature checks + ldp x0, x1, [sp, #16] + + // Save FPSIMD state + ldr x2, =fpr_out + stp q0, q1, [x2] + stp q2, q3, [x2, #16 * 2] + stp q4, q5, [x2, #16 * 4] + stp q6, q7, [x2, #16 * 6] + stp q8, q9, [x2, #16 * 8] + stp q10, q11, [x2, #16 * 10] + stp q12, q13, [x2, #16 * 12] + stp q14, q15, [x2, #16 * 14] + stp q16, q17, [x2, #16 * 16] + stp q18, q19, [x2, #16 * 18] + stp q20, q21, [x2, #16 * 20] + stp q22, q23, [x2, #16 * 22] + stp q24, q25, [x2, #16 * 24] + stp q26, q27, [x2, #16 * 26] + stp q28, q29, [x2, #16 * 28] + stp q30, q31, [x2, #16 * 30] + + // Save the SVE state if we have some + cbz x0, 1f + + ldr x2, =z_out + str z0, [x2, #0, MUL VL] + str z1, [x2, #1, MUL VL] + str z2, [x2, #2, MUL VL] + str z3, [x2, #3, MUL VL] + str z4, [x2, #4, MUL VL] + str z5, [x2, #5, MUL VL] + str z6, [x2, #6, MUL VL] + str z7, [x2, #7, MUL VL] + str z8, [x2, #8, MUL VL] + str z9, [x2, #9, MUL VL] + str z10, [x2, #10, MUL VL] + str z11, [x2, #11, MUL VL] + str z12, [x2, #12, MUL VL] + str z13, [x2, #13, MUL VL] + str z14, [x2, #14, MUL VL] + str z15, [x2, #15, MUL VL] + str z16, [x2, #16, MUL VL] + str z17, [x2, #17, MUL VL] + str z18, [x2, #18, MUL VL] + str z19, [x2, #19, MUL VL] + str z20, [x2, #20, MUL VL] + str z21, [x2, #21, MUL VL] + str z22, [x2, #22, MUL VL] + str z23, [x2, #23, MUL VL] + str z24, [x2, #24, MUL VL] + str z25, [x2, #25, MUL VL] + str z26, [x2, #26, MUL VL] + str z27, [x2, #27, MUL VL] + str z28, [x2, #28, MUL VL] + str z29, [x2, #29, MUL VL] + str z30, [x2, #30, MUL VL] + str z31, [x2, #31, MUL VL] + + ldr x2, =p_out + str p0, [x2, #0, MUL VL] + str p1, [x2, #1, MUL VL] + str p2, [x2, #2, MUL VL] + str p3, [x2, #3, MUL VL] + str p4, [x2, #4, MUL VL] + str p5, [x2, #5, MUL VL] + str p6, [x2, #6, MUL VL] + str p7, [x2, #7, MUL VL] + str p8, [x2, #8, MUL VL] + str p9, [x2, #9, MUL VL] + str p10, [x2, #10, MUL VL] + str p11, [x2, #11, MUL VL] + str p12, [x2, #12, MUL VL] + str p13, [x2, #13, MUL VL] + str p14, [x2, #14, MUL VL] + str p15, [x2, #15, MUL VL] + + ldr x2, =ffr_out + rdffr p0.b + str p0, [x2, #0] +1: + + // Restore callee saved registers x19-x30 + ldp x19, x20, [sp, #32] + ldp x21, x22, [sp, #48] + ldp x23, x24, [sp, #64] + ldp x25, x26, [sp, #80] + ldp x27, x28, [sp, #96] + ldp x29, x30, [sp], #112 + + ret diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c new file mode 100644 index 000000000000..d8eeeafb50dc --- /dev/null +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + */ + +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/hwcap.h> +#include <asm/sigcontext.h> +#include <asm/unistd.h> + +#include "../../kselftest.h" + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#define NUM_VL ((SVE_VQ_MAX - SVE_VQ_MIN) + 1) + +extern void do_syscall(int sve_vl); + +static void fill_random(void *buf, size_t size) +{ + int i; + uint32_t *lbuf = buf; + + /* random() returns a 32 bit number regardless of the size of long */ + for (i = 0; i < size / sizeof(uint32_t); i++) + lbuf[i] = random(); +} + +/* + * We also repeat the test for several syscalls to try to expose different + * behaviour. + */ +static struct syscall_cfg { + int syscall_nr; + const char *name; +} syscalls[] = { + { __NR_getpid, "getpid()" }, + { __NR_sched_yield, "sched_yield()" }, +}; + +#define NUM_GPR 31 +uint64_t gpr_in[NUM_GPR]; +uint64_t gpr_out[NUM_GPR]; + +static void setup_gpr(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(gpr_in, sizeof(gpr_in)); + gpr_in[8] = cfg->syscall_nr; + memset(gpr_out, 0, sizeof(gpr_out)); +} + +static int check_gpr(struct syscall_cfg *cfg, int sve_vl) +{ + int errors = 0; + int i; + + /* + * GPR x0-x7 may be clobbered, and all others should be preserved. + */ + for (i = 9; i < ARRAY_SIZE(gpr_in); i++) { + if (gpr_in[i] != gpr_out[i]) { + ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n", + cfg->name, sve_vl, i, + gpr_in[i], gpr_out[i]); + errors++; + } + } + + return errors; +} + +#define NUM_FPR 32 +uint64_t fpr_in[NUM_FPR * 2]; +uint64_t fpr_out[NUM_FPR * 2]; + +static void setup_fpr(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(fpr_in, sizeof(fpr_in)); + memset(fpr_out, 0, sizeof(fpr_out)); +} + +static int check_fpr(struct syscall_cfg *cfg, int sve_vl) +{ + int errors = 0; + int i; + + if (!sve_vl) { + for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { + if (fpr_in[i] != fpr_out[i]) { + ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", + cfg->name, + i / 2, i % 2, + fpr_in[i], fpr_out[i]); + errors++; + } + } + } + + return errors; +} + +static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t z_in[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +uint8_t z_out[SVE_NUM_PREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; + +static void setup_z(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(z_in, sizeof(z_in)); + fill_random(z_out, sizeof(z_out)); +} + +static int check_z(struct syscall_cfg *cfg, int sve_vl) +{ + size_t reg_size = sve_vl; + int errors = 0; + int i; + + if (!sve_vl) + return 0; + + /* + * After a syscall the low 128 bits of the Z registers should + * be preserved and the rest be zeroed or preserved. + */ + for (i = 0; i < SVE_NUM_ZREGS; i++) { + void *in = &z_in[reg_size * i]; + void *out = &z_out[reg_size * i]; + + if (memcmp(in, out, SVE_VQ_BYTES) != 0) { + ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n", + cfg->name, sve_vl, i); + errors++; + } + } + + return errors; +} + +uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; + +static void setup_p(struct syscall_cfg *cfg, int sve_vl) +{ + fill_random(p_in, sizeof(p_in)); + fill_random(p_out, sizeof(p_out)); +} + +static int check_p(struct syscall_cfg *cfg, int sve_vl) +{ + size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ + + int errors = 0; + int i; + + if (!sve_vl) + return 0; + + /* After a syscall the P registers should be preserved or zeroed */ + for (i = 0; i < SVE_NUM_PREGS * reg_size; i++) + if (p_out[i] && (p_in[i] != p_out[i])) + errors++; + if (errors) + ksft_print_msg("%s SVE VL %d predicate registers non-zero\n", + cfg->name, sve_vl); + + return errors; +} + +uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; + +static void setup_ffr(struct syscall_cfg *cfg, int sve_vl) +{ + /* + * It is only valid to set a contiguous set of bits starting + * at 0. For now since we're expecting this to be cleared by + * a syscall just set all bits. + */ + memset(ffr_in, 0xff, sizeof(ffr_in)); + fill_random(ffr_out, sizeof(ffr_out)); +} + +static int check_ffr(struct syscall_cfg *cfg, int sve_vl) +{ + size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */ + int errors = 0; + int i; + + if (!sve_vl) + return 0; + + /* After a syscall the P registers should be preserved or zeroed */ + for (i = 0; i < reg_size; i++) + if (ffr_out[i] && (ffr_in[i] != ffr_out[i])) + errors++; + if (errors) + ksft_print_msg("%s SVE VL %d FFR non-zero\n", + cfg->name, sve_vl); + + return errors; +} + +typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl); +typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl); + +/* + * Each set of registers has a setup function which is called before + * the syscall to fill values in a global variable for loading by the + * test code and a check function which validates that the results are + * as expected. Vector lengths are passed everywhere, a vector length + * of 0 should be treated as do not test. + */ +static struct { + setup_fn setup; + check_fn check; +} regset[] = { + { setup_gpr, check_gpr }, + { setup_fpr, check_fpr }, + { setup_z, check_z }, + { setup_p, check_p }, + { setup_ffr, check_ffr }, +}; + +static bool do_test(struct syscall_cfg *cfg, int sve_vl) +{ + int errors = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(regset); i++) + regset[i].setup(cfg, sve_vl); + + do_syscall(sve_vl); + + for (i = 0; i < ARRAY_SIZE(regset); i++) + errors += regset[i].check(cfg, sve_vl); + + return errors == 0; +} + +static void test_one_syscall(struct syscall_cfg *cfg) +{ + int sve_vq, sve_vl; + + /* FPSIMD only case */ + ksft_test_result(do_test(cfg, 0), + "%s FPSIMD\n", cfg->name); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + return; + + for (sve_vq = SVE_VQ_MAX; sve_vq > 0; --sve_vq) { + sve_vl = prctl(PR_SVE_SET_VL, sve_vq * 16); + if (sve_vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + sve_vl &= PR_SVE_VL_LEN_MASK; + + if (sve_vq != sve_vq_from_vl(sve_vl)) + sve_vq = sve_vq_from_vl(sve_vl); + + ksft_test_result(do_test(cfg, sve_vl), + "%s SVE VL %d\n", cfg->name, sve_vl); + } +} + +int sve_count_vls(void) +{ + unsigned int vq; + int vl_count = 0; + int vl; + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + return 0; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (vq != sve_vq_from_vl(vl)) + vq = sve_vq_from_vl(vl); + + vl_count++; + } + + return vl_count; +} + +int main(void) +{ + int i; + + srandom(getpid()); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(syscalls) * (sve_count_vls() + 1)); + + for (i = 0; i < ARRAY_SIZE(syscalls); i++) + test_one_syscall(&syscalls[i]); + + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore index b67395903b9b..c50d86331ed2 100644 --- a/tools/testing/selftests/arm64/fp/.gitignore +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -1,3 +1,4 @@ +fp-pidbench fpsimd-test rdvl-sve sve-probe-vls diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index ba1488c7c315..95f0b877a060 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -2,13 +2,15 @@ CFLAGS += -I../../../../../usr/include/ TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg -TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \ +TEST_PROGS_EXTENDED := fp-pidbench fpsimd-test fpsimd-stress \ rdvl-sve \ sve-test sve-stress \ vlset all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) +fp-pidbench: fp-pidbench.S asm-utils.o + $(CC) -nostdlib $^ -o $@ fpsimd-test: fpsimd-test.o asm-utils.o $(CC) -nostdlib $^ -o $@ rdvl-sve: rdvl-sve.o rdvl.o diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S new file mode 100644 index 000000000000..16a436389bfc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Trivial syscall overhead benchmark. +// +// This is implemented in asm to ensure that we don't have any issues with +// system libraries using instructions that disrupt the test. + +#include <asm/unistd.h> +#include "assembler.h" + +.arch_extension sve + +.macro test_loop per_loop + mov x10, x20 + mov x8, #__NR_getpid + mrs x11, CNTVCT_EL0 +1: + \per_loop + svc #0 + sub x10, x10, #1 + cbnz x10, 1b + + mrs x12, CNTVCT_EL0 + sub x0, x12, x11 + bl putdec + puts "\n" +.endm + +// Main program entry point +.globl _start +function _start +_start: + puts "Iterations per test: " + mov x20, #10000 + lsl x20, x20, #8 + mov x0, x20 + bl putdec + puts "\n" + + // Test having never used SVE + puts "No SVE: " + test_loop + + // Check for SVE support - should use hwcap but that's hard in asm + mrs x0, ID_AA64PFR0_EL1 + ubfx x0, x0, #32, #4 + cbnz x0, 1f + puts "System does not support SVE\n" + b out +1: + + // Execute a SVE instruction + puts "SVE VL: " + rdvl x0, #8 + bl putdec + puts "\n" + + puts "SVE used once: " + test_loop + + // Use SVE per syscall + puts "SVE used per syscall: " + test_loop "rdvl x0, #8" + + // And we're done +out: + mov x0, #0 + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index c4417bc48d4f..af798b9d232c 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -21,16 +21,37 @@ #include "../../kselftest.h" -#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) -#define FPSIMD_TESTS 5 - -#define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS) +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 #endif +struct vec_type { + const char *name; + unsigned long hwcap_type; + unsigned long hwcap; + int regset; + int prctl_set; +}; + +static const struct vec_type vec_types[] = { + { + .name = "SVE", + .hwcap_type = AT_HWCAP, + .hwcap = HWCAP_SVE, + .regset = NT_ARM_SVE, + .prctl_set = PR_SVE_SET_VL, + }, +}; + +#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) +#define FLAG_TESTS 2 +#define FPSIMD_TESTS 3 + +#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types)) + static void fill_buf(char *buf, size_t size) { int i; @@ -59,7 +80,8 @@ static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); } -static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) +static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, + void **buf, size_t *size) { struct user_sve_header *sve; void *p; @@ -80,7 +102,7 @@ static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) iov.iov_base = *buf; iov.iov_len = sz; - if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) + if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov)) goto error; sve = *buf; @@ -96,17 +118,18 @@ error: return NULL; } -static int set_sve(pid_t pid, const struct user_sve_header *sve) +static int set_sve(pid_t pid, const struct vec_type *type, + const struct user_sve_header *sve) { struct iovec iov; iov.iov_base = (void *)sve; iov.iov_len = sve->size; - return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); + return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); } /* Validate setting and getting the inherit flag */ -static void ptrace_set_get_inherit(pid_t child) +static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) { struct user_sve_header sve; struct user_sve_header *new_sve = NULL; @@ -118,9 +141,10 @@ static void ptrace_set_get_inherit(pid_t child) sve.size = sizeof(sve); sve.vl = sve_vl_from_vq(SVE_VQ_MIN); sve.flags = SVE_PT_VL_INHERIT; - ret = set_sve(child, &sve); + ret = set_sve(child, type, &sve); if (ret != 0) { - ksft_test_result_fail("Failed to set SVE_PT_VL_INHERIT\n"); + ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", + type->name); return; } @@ -128,35 +152,39 @@ static void ptrace_set_get_inherit(pid_t child) * Read back the new register state and verify that we have * set the flags we expected. */ - if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { - ksft_test_result_fail("Failed to read SVE flags\n"); + if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read %s SVE flags\n", + type->name); return; } ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT, - "SVE_PT_VL_INHERIT set\n"); + "%s SVE_PT_VL_INHERIT set\n", type->name); /* Now clear */ sve.flags &= ~SVE_PT_VL_INHERIT; - ret = set_sve(child, &sve); + ret = set_sve(child, type, &sve); if (ret != 0) { - ksft_test_result_fail("Failed to clear SVE_PT_VL_INHERIT\n"); + ksft_test_result_fail("Failed to clear %s SVE_PT_VL_INHERIT\n", + type->name); return; } - if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { - ksft_test_result_fail("Failed to read SVE flags\n"); + if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read %s SVE flags\n", + type->name); return; } ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT), - "SVE_PT_VL_INHERIT cleared\n"); + "%s SVE_PT_VL_INHERIT cleared\n", type->name); free(new_sve); } /* Validate attempting to set the specfied VL via ptrace */ -static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) +static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, + unsigned int vl, bool *supported) { struct user_sve_header sve; struct user_sve_header *new_sve = NULL; @@ -166,10 +194,10 @@ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) *supported = false; /* Check if the VL is supported in this process */ - prctl_vl = prctl(PR_SVE_SET_VL, vl); + prctl_vl = prctl(type->prctl_set, vl); if (prctl_vl == -1) - ksft_exit_fail_msg("prctl(PR_SVE_SET_VL) failed: %s (%d)\n", - strerror(errno), errno); + ksft_exit_fail_msg("prctl(PR_%s_SET_VL) failed: %s (%d)\n", + type->name, strerror(errno), errno); /* If the VL is not supported then a supported VL will be returned */ *supported = (prctl_vl == vl); @@ -178,9 +206,10 @@ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); sve.vl = vl; - ret = set_sve(child, &sve); + ret = set_sve(child, type, &sve); if (ret != 0) { - ksft_test_result_fail("Failed to set VL %u\n", vl); + ksft_test_result_fail("Failed to set %s VL %u\n", + type->name, vl); return; } @@ -188,12 +217,14 @@ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) * Read back the new register state and verify that we have the * same VL that we got from prctl() on ourselves. */ - if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { - ksft_test_result_fail("Failed to read VL %u\n", vl); + if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read %s VL %u\n", + type->name, vl); return; } - ksft_test_result(new_sve->vl = prctl_vl, "Set VL %u\n", vl); + ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + type->name, vl); free(new_sve); } @@ -209,7 +240,7 @@ static void check_u32(unsigned int vl, const char *reg, } /* Access the FPSIMD registers via the SVE regset */ -static void ptrace_sve_fpsimd(pid_t child) +static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) { void *svebuf = NULL; size_t svebufsz = 0; @@ -219,17 +250,18 @@ static void ptrace_sve_fpsimd(pid_t child) unsigned char *p; /* New process should start with FPSIMD registers only */ - sve = get_sve(child, &svebuf, &svebufsz); + sve = get_sve(child, type, &svebuf, &svebufsz); if (!sve) { - ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + ksft_test_result_fail("get_sve(%s): %s\n", + type->name, strerror(errno)); return; } else { - ksft_test_result_pass("get_sve(FPSIMD)\n"); + ksft_test_result_pass("get_sve(%s FPSIMD)\n", type->name); } ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Set FPSIMD registers\n"); + "Set FPSIMD registers via %s\n", type->name); if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) goto out; @@ -243,9 +275,9 @@ static void ptrace_sve_fpsimd(pid_t child) p[j] = j; } - if (set_sve(child, sve)) { - ksft_test_result_fail("set_sve(FPSIMD): %s\n", - strerror(errno)); + if (set_sve(child, type, sve)) { + ksft_test_result_fail("set_sve(%s FPSIMD): %s\n", + type->name, strerror(errno)); goto out; } @@ -257,16 +289,20 @@ static void ptrace_sve_fpsimd(pid_t child) goto out; } if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) - ksft_test_result_pass("get_fpsimd() gave same state\n"); + ksft_test_result_pass("%s get_fpsimd() gave same state\n", + type->name); else - ksft_test_result_fail("get_fpsimd() gave different state\n"); + ksft_test_result_fail("%s get_fpsimd() gave different state\n", + type->name); out: free(svebuf); } /* Validate attempting to set SVE data and read SVE data */ -static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) +static void ptrace_set_sve_get_sve_data(pid_t child, + const struct vec_type *type, + unsigned int vl) { void *write_buf; void *read_buf = NULL; @@ -281,8 +317,8 @@ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", - data_size, vl); + ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + data_size, type->name, vl); return; } write_sve = write_buf; @@ -306,23 +342,26 @@ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) /* TODO: Generate a valid FFR pattern */ - ret = set_sve(child, write_sve); + ret = set_sve(child, type, write_sve); if (ret != 0) { - ksft_test_result_fail("Failed to set VL %u data\n", vl); + ksft_test_result_fail("Failed to set %s VL %u data\n", + type->name, vl); goto out; } /* Read the data back */ - if (!get_sve(child, (void **)&read_buf, &read_sve_size)) { - ksft_test_result_fail("Failed to read VL %u data\n", vl); + if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) { + ksft_test_result_fail("Failed to read %s VL %u data\n", + type->name, vl); goto out; } read_sve = read_buf; /* We might read more data if there's extensions we don't know */ if (read_sve->size < write_sve->size) { - ksft_test_result_fail("Wrote %d bytes, only read %d\n", - write_sve->size, read_sve->size); + ksft_test_result_fail("%s wrote %d bytes, only read %d\n", + type->name, write_sve->size, + read_sve->size); goto out_read; } @@ -349,7 +388,8 @@ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); - ksft_test_result(errors == 0, "Set and get SVE data for VL %u\n", vl); + ksft_test_result(errors == 0, "Set and get %s data for VL %u\n", + type->name, vl); out_read: free(read_buf); @@ -358,7 +398,9 @@ out: } /* Validate attempting to set SVE data and read SVE data */ -static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) +static void ptrace_set_sve_get_fpsimd_data(pid_t child, + const struct vec_type *type, + unsigned int vl) { void *write_buf; struct user_sve_header *write_sve; @@ -376,8 +418,8 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", - data_size, vl); + ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + data_size, type->name, vl); return; } write_sve = write_buf; @@ -395,16 +437,17 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); - ret = set_sve(child, write_sve); + ret = set_sve(child, type, write_sve); if (ret != 0) { - ksft_test_result_fail("Failed to set VL %u data\n", vl); + ksft_test_result_fail("Failed to set %s VL %u data\n", + type->name, vl); goto out; } /* Read the data back */ if (get_fpsimd(child, &fpsimd_state)) { - ksft_test_result_fail("Failed to read VL %u FPSIMD data\n", - vl); + ksft_test_result_fail("Failed to read %s VL %u FPSIMD data\n", + type->name, vl); goto out; } @@ -419,7 +462,8 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) sizeof(tmp)); if (tmp != fpsimd_state.vregs[i]) { - printf("# Mismatch in FPSIMD for VL %u Z%d\n", vl, i); + printf("# Mismatch in FPSIMD for %s VL %u Z%d\n", + type->name, vl, i); errors++; } } @@ -429,8 +473,8 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &fpsimd_state.fpcr, &errors); - ksft_test_result(errors == 0, "Set and get FPSIMD data for VL %u\n", - vl); + ksft_test_result(errors == 0, "Set and get FPSIMD data for %s VL %u\n", + type->name, vl); out: free(write_buf); @@ -440,7 +484,7 @@ static int do_parent(pid_t child) { int ret = EXIT_FAILURE; pid_t pid; - int status; + int status, i; siginfo_t si; unsigned int vq, vl; bool vl_supported; @@ -499,26 +543,47 @@ static int do_parent(pid_t child) } } - /* FPSIMD via SVE regset */ - ptrace_sve_fpsimd(child); - - /* prctl() flags */ - ptrace_set_get_inherit(child); - - /* Step through every possible VQ */ - for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { - vl = sve_vl_from_vq(vq); + for (i = 0; i < ARRAY_SIZE(vec_types); i++) { + /* FPSIMD via SVE regset */ + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_sve_fpsimd(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s FPSIMD get via SVE\n", + vec_types[i].name); + ksft_test_result_skip("%s FPSIMD set via SVE\n", + vec_types[i].name); + ksft_test_result_skip("%s set read via FPSIMD\n", + vec_types[i].name); + } - /* First, try to set this vector length */ - ptrace_set_get_vl(child, vl, &vl_supported); + /* prctl() flags */ + ptrace_set_get_inherit(child, &vec_types[i]); + + /* Step through every possible VQ */ + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* First, try to set this vector length */ + if (getauxval(vec_types[i].hwcap_type) & + vec_types[i].hwcap) { + ptrace_set_get_vl(child, &vec_types[i], vl, + &vl_supported); + } else { + ksft_test_result_skip("%s get/set VL %d\n", + vec_types[i].name, vl); + vl_supported = false; + } - /* If the VL is supported validate data set/get */ - if (vl_supported) { - ptrace_set_sve_get_sve_data(child, vl); - ptrace_set_sve_get_fpsimd_data(child, vl); - } else { - ksft_test_result_skip("set SVE get SVE for VL %d\n", vl); - ksft_test_result_skip("set SVE get FPSIMD for VL %d\n", vl); + /* If the VL is supported validate data set/get */ + if (vl_supported) { + ptrace_set_sve_get_sve_data(child, &vec_types[i], vl); + ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl); + } else { + ksft_test_result_skip("%s set SVE get SVE for VL %d\n", + vec_types[i].name, vl); + ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n", + vec_types[i].name, vl); + } } } diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 272b888e018e..c90658811a83 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -21,8 +21,6 @@ #include "../../kselftest.h" #include "rdvl.h" -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - #define ARCH_MIN_VL SVE_VL_MIN struct vec_data { diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 22722abc9dfa..2f8c23af3b5e 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -310,14 +310,12 @@ int test_setup(struct tdescr *td) int test_run(struct tdescr *td) { - if (td->sig_trig) { - if (td->trigger) - return td->trigger(td); - else - return default_trigger(td); - } else { + if (td->trigger) + return td->trigger(td); + else if (td->sig_trig) + return default_trigger(td); + else return td->run(td, NULL, NULL); - } } void test_result(struct tdescr *td) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 62fafbeb4672..42ffc24e9e71 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,9 +23,8 @@ BPF_GCC ?= $(shell command -v bpf-gcc;) SAN_CFLAGS ?= CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \ - -Dbpf_prog_load=bpf_prog_test_load \ - -Dbpf_load_program=bpf_test_load_program + -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) +LDFLAGS += $(SAN_CFLAGS) LDLIBS += -lcap -lelf -lz -lrt -lpthread # Silence some warnings when compiled with clang @@ -46,10 +45,8 @@ ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc endif -TEST_GEN_FILES = test_lwt_ip_encap.o \ - test_tc_edt.o -TEST_FILES = xsk_prereqs.sh \ - $(wildcard progs/btf_dump_test_case_*.c) +TEST_GEN_FILES = test_lwt_ip_encap.o test_tc_edt.o +TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -108,7 +105,10 @@ endif OVERRIDE_TARGETS := 1 override define CLEAN $(call msg,CLEAN) - $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) + $(Q)$(RM) -r $(TEST_GEN_PROGS) + $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED) + $(Q)$(RM) -r $(TEST_GEN_FILES) + $(Q)$(RM) -r $(EXTRA_CLEAN) $(Q)$(MAKE) -C bpf_testmod clean $(Q)$(MAKE) docs-clean endef @@ -170,7 +170,7 @@ $(OUTPUT)/%:%.c $(OUTPUT)/urandom_read: urandom_read.c $(call msg,BINARY,,$@) - $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id=sha1 + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -178,10 +178,6 @@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_tes $(Q)$(MAKE) $(submake_extras) -C bpf_testmod $(Q)cp bpf_testmod/bpf_testmod.ko $@ -$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) - $(call msg,CC,,$@) - $(Q)$(CC) -c $(CFLAGS) -o $@ $< - DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) @@ -194,24 +190,34 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL) -$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) - -$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c -$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c -$(OUTPUT)/test_sock: cgroup_helpers.c -$(OUTPUT)/test_sock_addr: cgroup_helpers.c -$(OUTPUT)/test_sockmap: cgroup_helpers.c -$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c -$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c -$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c -$(OUTPUT)/test_sock_fields: cgroup_helpers.c -$(OUTPUT)/test_sysctl: cgroup_helpers.c +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ) + +CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o +TESTING_HELPERS := $(OUTPUT)/testing_helpers.o +TRACE_HELPERS := $(OUTPUT)/trace_helpers.o + +$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) +$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_cgroup_storage: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) +$(OUTPUT)/test_tag: $(TESTING_HELPERS) +$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) +$(OUTPUT)/xdping: $(TESTING_HELPERS) +$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) +$(OUTPUT)/test_maps: $(TESTING_HELPERS) +$(OUTPUT)/test_verifier: $(TESTING_HELPERS) BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - CC=$(HOSTCC) LD=$(HOSTLD) \ + ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ @@ -231,18 +237,18 @@ docs-clean: prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - ../../../include/uapi/linux/bpf.h \ + $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ EXTRA_CFLAGS='-g -O0' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) -$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - ../../../include/uapi/linux/bpf.h \ - | $(HOST_BUILD_DIR)/libbpf +$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g -O0' \ + EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers endif @@ -304,12 +310,12 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $3 - CFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v3 + $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -o $2 -mcpu=v2 + $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE @@ -323,9 +329,10 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ - test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c + test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \ + map_ptr_kern.c core_kern.c # Generate both light skeleton and libbpf skeleton for these -LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c +LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c SKEL_BLACKLIST += $$(LSKELS) test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o @@ -471,13 +478,12 @@ TRUNNER_TESTS_DIR := prog_tests TRUNNER_BPF_PROGS_DIR := progs TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ - btf_helpers.c flow_dissector_load.h + btf_helpers.c flow_dissector_load.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -TRUNNER_BPF_CFLAGS += -DENABLE_ATOMICS_TESTS +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs)) # Define test_progs-no_alu32 test runner. @@ -530,16 +536,22 @@ $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/perfbuf_bench.skel.h $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h +$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h +$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm -$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ +$(OUTPUT)/bench: $(OUTPUT)/bench.o \ + $(TESTING_HELPERS) \ + $(TRACE_HELPERS) \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ $(OUTPUT)/bench_ringbufs.o \ - $(OUTPUT)/bench_bloom_filter_map.o + $(OUTPUT)/bench_bloom_filter_map.o \ + $(OUTPUT)/bench_bpf_loop.o \ + $(OUTPUT)/bench_strncmp.o $(call msg,BINARY,,$@) - $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 5e287e445f75..42ef250c7acc 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -204,16 +204,17 @@ __ https://reviews.llvm.org/D93563 btf_tag test and Clang version ============================== -The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute. -It was introduced in `Clang 14`__. +The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and +btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_]. -Without it, the btf_tag selftest will be skipped and you will observe: +Without them, the btf_tag selftest will be skipped and you will observe: .. code-block:: console #<test_num> btf_tag:SKIP -__ https://reviews.llvm.org/D111588 +.. _0: https://reviews.llvm.org/D111588 +.. _1: https://reviews.llvm.org/D111199 Clang dependencies for static linking tests =========================================== diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index cc4722f693e9..f973320e6dbf 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -29,26 +29,10 @@ static int libbpf_print_fn(enum libbpf_print_level level, return vfprintf(stderr, format, args); } -static int bump_memlock_rlimit(void) +void setup_libbpf(void) { - struct rlimit rlim_new = { - .rlim_cur = RLIM_INFINITY, - .rlim_max = RLIM_INFINITY, - }; - - return setrlimit(RLIMIT_MEMLOCK, &rlim_new); -} - -void setup_libbpf() -{ - int err; - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); - - err = bump_memlock_rlimit(); - if (err) - fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err); } void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns) @@ -134,6 +118,39 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt) total_ops_mean, total_ops_stddev); } +void ops_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double hits_per_sec, hits_per_prod; + + hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0); + hits_per_prod = hits_per_sec / env.producer_cnt; + + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod); +} + +void ops_report_final(struct bench_res res[], int res_cnt) +{ + double hits_mean = 0.0, hits_stddev = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + + hits_stddev = sqrt(hits_stddev); + } + printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ", + hits_mean, hits_stddev, hits_mean / env.producer_cnt); + printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); +} + const char *argp_program_version = "benchmark"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; const char argp_program_doc[] = @@ -171,10 +188,14 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; +extern struct argp bench_bpf_loop_argp; +extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, + { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, + { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, {}, }; @@ -359,6 +380,11 @@ extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_fentry; extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; +extern const struct bench bench_trig_uprobe_base; +extern const struct bench bench_trig_uprobe_with_nop; +extern const struct bench bench_trig_uretprobe_with_nop; +extern const struct bench bench_trig_uprobe_without_nop; +extern const struct bench bench_trig_uretprobe_without_nop; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; @@ -368,6 +394,9 @@ extern const struct bench bench_bloom_update; extern const struct bench bench_bloom_false_positive; extern const struct bench bench_hashmap_without_bloom; extern const struct bench bench_hashmap_with_bloom; +extern const struct bench bench_bpf_loop; +extern const struct bench bench_strncmp_no_helper; +extern const struct bench bench_strncmp_helper; static const struct bench *benchs[] = { &bench_count_global, @@ -385,6 +414,11 @@ static const struct bench *benchs[] = { &bench_trig_fentry, &bench_trig_fentry_sleep, &bench_trig_fmodret, + &bench_trig_uprobe_base, + &bench_trig_uprobe_with_nop, + &bench_trig_uretprobe_with_nop, + &bench_trig_uprobe_without_nop, + &bench_trig_uretprobe_without_nop, &bench_rb_libbpf, &bench_rb_custom, &bench_pb_libbpf, @@ -394,6 +428,9 @@ static const struct bench *benchs[] = { &bench_bloom_false_positive, &bench_hashmap_without_bloom, &bench_hashmap_with_bloom, + &bench_bpf_loop, + &bench_strncmp_no_helper, + &bench_strncmp_helper, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 624c6b11501f..fb3e213df3dc 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -38,8 +38,8 @@ struct bench_res { struct bench { const char *name; - void (*validate)(); - void (*setup)(); + void (*validate)(void); + void (*setup)(void); void *(*producer_thread)(void *ctx); void *(*consumer_thread)(void *ctx); void (*measure)(struct bench_res* res); @@ -54,13 +54,16 @@ struct counter { extern struct env env; extern const struct bench *bench; -void setup_libbpf(); +void setup_libbpf(void); void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns); void hits_drops_report_final(struct bench_res res[], int res_cnt); void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); void false_hits_report_final(struct bench_res res[], int res_cnt); +void ops_report_progress(int iter, struct bench_res *res, long delta_ns); +void ops_report_final(struct bench_res res[], int res_cnt); -static inline __u64 get_time_ns() { +static inline __u64 get_time_ns(void) +{ struct timespec t; clock_gettime(CLOCK_MONOTONIC, &t); diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c index 6eeeed2913e6..5bcb8a8cdeb2 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -63,29 +63,34 @@ static const struct argp_option opts[] = { static error_t parse_arg(int key, char *arg, struct argp_state *state) { + long ret; + switch (key) { case ARG_NR_ENTRIES: - args.nr_entries = strtol(arg, NULL, 10); - if (args.nr_entries == 0) { + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { fprintf(stderr, "Invalid nr_entries count."); argp_usage(state); } + args.nr_entries = ret; break; case ARG_NR_HASH_FUNCS: - args.nr_hash_funcs = strtol(arg, NULL, 10); - if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) { + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > 15) { fprintf(stderr, "The bloom filter must use 1 to 15 hash functions."); argp_usage(state); } + args.nr_hash_funcs = ret; break; case ARG_VALUE_SIZE: - args.value_size = strtol(arg, NULL, 10); - if (args.value_size < 2 || args.value_size > 256) { + ret = strtol(arg, NULL, 10); + if (ret < 2 || ret > 256) { fprintf(stderr, "Invalid value size. Must be between 2 and 256 bytes"); argp_usage(state); } + args.value_size = ret; break; default: return ARGP_ERR_UNKNOWN; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c new file mode 100644 index 000000000000..d0a6572bfab6 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include "bench.h" +#include "bpf_loop_bench.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_loop_bench *skel; +} ctx; + +static struct { + __u32 nr_loops; +} args = { + .nr_loops = 10, +}; + +enum { + ARG_NR_LOOPS = 4000, +}; + +static const struct argp_option opts[] = { + { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0, + "Set number of loops for the bpf_loop helper"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_LOOPS: + args.nr_loops = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bpf_loop_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) + /* trigger the bpf program */ + syscall(__NR_getpgid); + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup(void) +{ + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = bpf_loop_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = args.nr_loops; +} + +const struct bench bench_bpf_loop = { + .name = "bpf-loop", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c index befba7a82643..078972ce208e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_count.c +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -36,7 +36,7 @@ static struct count_local_ctx { struct counter *hits; } count_local_ctx; -static void count_local_setup() +static void count_local_setup(void) { struct count_local_ctx *ctx = &count_local_ctx; diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index c7ec114eca56..3c203b6d6a6e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -11,7 +11,7 @@ static struct ctx { int fd; } ctx; -static void validate() +static void validate(void) { if (env.producer_cnt != 1) { fprintf(stderr, "benchmark doesn't support multi-producer!\n"); @@ -43,7 +43,7 @@ static void measure(struct bench_res *res) res->hits = atomic_swap(&ctx.hits.value, 0); } -static void setup_ctx() +static void setup_ctx(void) { setup_libbpf(); @@ -71,36 +71,36 @@ static void attach_bpf(struct bpf_program *prog) } } -static void setup_base() +static void setup_base(void) { setup_ctx(); } -static void setup_kprobe() +static void setup_kprobe(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog1); } -static void setup_kretprobe() +static void setup_kretprobe(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog2); } -static void setup_rawtp() +static void setup_rawtp(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog3); } -static void setup_fentry() +static void setup_fentry(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog4); } -static void setup_fexit() +static void setup_fexit(void) { setup_ctx(); attach_bpf(ctx.skel->progs.prog5); diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index d167bffac679..da8593b3494a 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -88,12 +88,12 @@ const struct argp bench_ringbufs_argp = { static struct counter buf_hits; -static inline void bufs_trigger_batch() +static inline void bufs_trigger_batch(void) { (void)syscall(__NR_getpgid); } -static void bufs_validate() +static void bufs_validate(void) { if (env.consumer_cnt != 1) { fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); @@ -132,7 +132,7 @@ static void ringbuf_libbpf_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static struct ringbuf_bench *ringbuf_setup_skeleton() +static struct ringbuf_bench *ringbuf_setup_skeleton(void) { struct ringbuf_bench *skel; @@ -167,7 +167,7 @@ static int buf_process_sample(void *ctx, void *data, size_t len) return 0; } -static void ringbuf_libbpf_setup() +static void ringbuf_libbpf_setup(void) { struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; struct bpf_link *link; @@ -223,7 +223,7 @@ static void ringbuf_custom_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static void ringbuf_custom_setup() +static void ringbuf_custom_setup(void) { struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; const size_t page_size = getpagesize(); @@ -352,7 +352,7 @@ static void perfbuf_measure(struct bench_res *res) res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); } -static struct perfbuf_bench *perfbuf_setup_skeleton() +static struct perfbuf_bench *perfbuf_setup_skeleton(void) { struct perfbuf_bench *skel; @@ -390,15 +390,10 @@ perfbuf_process_sample_raw(void *input_ctx, int cpu, return LIBBPF_PERF_EVENT_CONT; } -static void perfbuf_libbpf_setup() +static void perfbuf_libbpf_setup(void) { struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; struct perf_event_attr attr; - struct perf_buffer_raw_opts pb_opts = { - .event_cb = perfbuf_process_sample_raw, - .ctx = (void *)(long)0, - .attr = &attr, - }; struct bpf_link *link; ctx->skel = perfbuf_setup_skeleton(); @@ -423,7 +418,8 @@ static void perfbuf_libbpf_setup() } ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), - args.perfbuf_sz, &pb_opts); + args.perfbuf_sz, &attr, + perfbuf_process_sample_raw, NULL, NULL); if (!ctx->perfbuf) { fprintf(stderr, "failed to create perfbuf\n"); exit(1); diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c new file mode 100644 index 000000000000..494b591c0289 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <argp.h> +#include "bench.h" +#include "strncmp_bench.skel.h" + +static struct strncmp_ctx { + struct strncmp_bench *skel; +} ctx; + +static struct strncmp_args { + u32 cmp_str_len; +} args = { + .cmp_str_len = 32, +}; + +enum { + ARG_CMP_STR_LEN = 5000, +}; + +static const struct argp_option opts[] = { + { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0, + "Set the length of compared string" }, + {}, +}; + +static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_CMP_STR_LEN: + args.cmp_str_len = strtoul(arg, NULL, 10); + if (!args.cmp_str_len || + args.cmp_str_len >= sizeof(ctx.skel->bss->str)) { + fprintf(stderr, "Invalid cmp str len (limit %zu)\n", + sizeof(ctx.skel->bss->str)); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_strncmp_argp = { + .options = opts, + .parser = strncmp_parse_arg, +}; + +static void strncmp_validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void strncmp_setup(void) +{ + int err; + char *target; + size_t i, sz; + + sz = sizeof(ctx.skel->rodata->target); + if (!sz || sz < sizeof(ctx.skel->bss->str)) { + fprintf(stderr, "invalid string size (target %zu, src %zu)\n", + sz, sizeof(ctx.skel->bss->str)); + exit(1); + } + + setup_libbpf(); + + ctx.skel = strncmp_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + srandom(time(NULL)); + target = ctx.skel->rodata->target; + for (i = 0; i < sz - 1; i++) + target[i] = '1' + random() % 9; + target[sz - 1] = '\0'; + + ctx.skel->rodata->cmp_str_len = args.cmp_str_len; + + memcpy(ctx.skel->bss->str, target, args.cmp_str_len); + ctx.skel->bss->str[args.cmp_str_len] = '\0'; + /* Make bss->str < rodata->target */ + ctx.skel->bss->str[args.cmp_str_len - 1] -= 1; + + err = strncmp_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + strncmp_bench__destroy(ctx.skel); + exit(1); + } +} + +static void strncmp_attach_prog(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void strncmp_no_helper_setup(void) +{ + strncmp_setup(); + strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper); +} + +static void strncmp_helper_setup(void) +{ + strncmp_setup(); + strncmp_attach_prog(ctx.skel->progs.strncmp_helper); +} + +static void *strncmp_producer(void *ctx) +{ + while (true) + (void)syscall(__NR_getpgid); + return NULL; +} + +static void *strncmp_consumer(void *ctx) +{ + return NULL; +} + +static void strncmp_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +const struct bench bench_strncmp_no_helper = { + .name = "strncmp-no-helper", + .validate = strncmp_validate, + .setup = strncmp_no_helper_setup, + .producer_thread = strncmp_producer, + .consumer_thread = strncmp_consumer, + .measure = strncmp_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_strncmp_helper = { + .name = "strncmp-helper", + .validate = strncmp_validate, + .setup = strncmp_helper_setup, + .producer_thread = strncmp_producer, + .consumer_thread = strncmp_consumer, + .measure = strncmp_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index f41a491a8cc0..7f957c55a3ca 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -2,6 +2,7 @@ /* Copyright (c) 2020 Facebook */ #include "bench.h" #include "trigger_bench.skel.h" +#include "trace_helpers.h" /* BPF triggering benchmarks */ static struct trigger_ctx { @@ -10,7 +11,7 @@ static struct trigger_ctx { static struct counter base_hits; -static void trigger_validate() +static void trigger_validate(void) { if (env.consumer_cnt != 1) { fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); @@ -44,7 +45,7 @@ static void trigger_measure(struct bench_res *res) res->hits = atomic_swap(&ctx.skel->bss->hits, 0); } -static void setup_ctx() +static void setup_ctx(void) { setup_libbpf(); @@ -66,37 +67,37 @@ static void attach_bpf(struct bpf_program *prog) } } -static void trigger_tp_setup() +static void trigger_tp_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_tp); } -static void trigger_rawtp_setup() +static void trigger_rawtp_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); } -static void trigger_kprobe_setup() +static void trigger_kprobe_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe); } -static void trigger_fentry_setup() +static void trigger_fentry_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry); } -static void trigger_fentry_sleep_setup() +static void trigger_fentry_sleep_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); } -static void trigger_fmodret_setup() +static void trigger_fmodret_setup(void) { setup_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fmodret); @@ -107,6 +108,101 @@ static void *trigger_consumer(void *input) return NULL; } +/* make sure call is not inlined and not avoided by compiler, so __weak and + * inline asm volatile in the body of the function + * + * There is a performance difference between uprobing at nop location vs other + * instructions. So use two different targets, one of which starts with nop + * and another doesn't. + * + * GCC doesn't generate stack setup preample for these functions due to them + * having no input arguments and doing nothing in the body. + */ +__weak void uprobe_target_with_nop(void) +{ + asm volatile ("nop"); +} + +__weak void uprobe_target_without_nop(void) +{ + asm volatile (""); +} + +static void *uprobe_base_producer(void *input) +{ + while (true) { + uprobe_target_with_nop(); + atomic_inc(&base_hits.value); + } + return NULL; +} + +static void *uprobe_producer_with_nop(void *input) +{ + while (true) + uprobe_target_with_nop(); + return NULL; +} + +static void *uprobe_producer_without_nop(void *input) +{ + while (true) + uprobe_target_without_nop(); + return NULL; +} + +static void usetup(bool use_retprobe, bool use_nop) +{ + size_t uprobe_offset; + ssize_t base_addr; + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = trigger_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + base_addr = get_base_addr(); + if (use_nop) + uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr); + else + uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr); + + link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, + use_retprobe, + -1 /* all PIDs */, + "/proc/self/exe", + uprobe_offset); + if (!link) { + fprintf(stderr, "failed to attach uprobe!\n"); + exit(1); + } + ctx.skel->links.bench_trigger_uprobe = link; +} + +static void uprobe_setup_with_nop(void) +{ + usetup(false, true); +} + +static void uretprobe_setup_with_nop(void) +{ + usetup(true, true); +} + +static void uprobe_setup_without_nop(void) +{ + usetup(false, false); +} + +static void uretprobe_setup_without_nop(void) +{ + usetup(true, false); +} + const struct bench bench_trig_base = { .name = "trig-base", .validate = trigger_validate, @@ -182,3 +278,53 @@ const struct bench bench_trig_fmodret = { .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; + +const struct bench bench_trig_uprobe_base = { + .name = "trig-uprobe-base", + .setup = NULL, /* no uprobe/uretprobe is attached */ + .producer_thread = uprobe_base_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_base_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uprobe_with_nop = { + .name = "trig-uprobe-with-nop", + .setup = uprobe_setup_with_nop, + .producer_thread = uprobe_producer_with_nop, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uretprobe_with_nop = { + .name = "trig-uretprobe-with-nop", + .setup = uretprobe_setup_with_nop, + .producer_thread = uprobe_producer_with_nop, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uprobe_without_nop = { + .name = "trig-uprobe-without-nop", + .setup = uprobe_setup_without_nop, + .producer_thread = uprobe_producer_without_nop, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uretprobe_without_nop = { + .name = "trig-uretprobe-without-nop", + .setup = uretprobe_setup_without_nop, + .producer_thread = uprobe_producer_without_nop, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh new file mode 100755 index 000000000000..d4f5f73b356b --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for t in 1 4 8 12 16; do +for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do +subtitle "nr_loops: $i, nr_threads: $t" + summarize_ops "bpf_loop: " \ + "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)" + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh new file mode 100755 index 000000000000..142697284b45 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for s in 1 8 64 512 2048 4095; do + for b in no-helper helper; do + summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})" + done +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 9a16be78b180..6c5e6023a69f 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -33,6 +33,14 @@ function percentage() echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" } +function ops() +{ + echo -n "throughput: " + echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", latency: " + echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" +} + function total() { echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" @@ -52,6 +60,13 @@ function summarize_percentage() printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" } +function summarize_ops() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(ops $summary)" +} + function summarize_total() { bench="$1" diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b5b6b013a245..b5941d514e17 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -25,11 +25,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_DATASEC] = "DATASEC", [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", + [BTF_KIND_TYPE_TAG] = "TYPE_TAG", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_DECL_TAG) + if (kind > BTF_KIND_TYPE_TAG) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -109,6 +110,7 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_TYPEDEF: + case BTF_KIND_TYPE_TAG: fprintf(out, " type_id=%u", t->type); break; case BTF_KIND_ARRAY: { @@ -238,7 +240,6 @@ const char *btf_type_c_dump(const struct btf *btf) static char buf[16 * 1024]; FILE *buf_file; struct btf_dump *d = NULL; - struct btf_dump_opts opts = {}; int err, i; buf_file = fmemopen(buf, sizeof(buf) - 1, "w"); @@ -247,22 +248,26 @@ const char *btf_type_c_dump(const struct btf *btf) return NULL; } - opts.ctx = buf_file; - d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + d = btf_dump__new(btf, btf_dump_printf, buf_file, NULL); if (libbpf_get_error(d)) { fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d)); - return NULL; + goto err_out; } for (i = 1; i < btf__type_cnt(btf); i++) { err = btf_dump__dump_type(d, i); if (err) { fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err); - return NULL; + goto err_out; } } + btf_dump__free(d); fflush(buf_file); fclose(buf_file); return buf; +err_out: + btf_dump__free(d); + fclose(buf_file); + return NULL; } diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 5192305159ec..f6287132fa89 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -38,7 +38,9 @@ CONFIG_IPV6_SIT=m CONFIG_BPF_JIT=y CONFIG_BPF_LSM=y CONFIG_SECURITY=y +CONFIG_RC_CORE=y CONFIG_LIRC=y +CONFIG_BPF_LIRC_MODE2=y CONFIG_IMA=y CONFIG_SECURITYFS=y CONFIG_IMA_WRITE_POLICY=y diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 9d0acc2fc6cc..f40b585f4e7e 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -4,6 +4,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "testing_helpers.h" static inline int bpf_flow_load(struct bpf_object **obj, const char *path, @@ -18,7 +19,7 @@ static inline int bpf_flow_load(struct bpf_object **obj, int prog_array_fd; int ret, fd, i; - ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, + ret = bpf_prog_test_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj, prog_fd); if (ret) return ret; diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c index 99628e1a1e58..3a7b82bd9e94 100644 --- a/tools/testing/selftests/bpf/get_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c @@ -19,6 +19,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "testing_helpers.h" #include "bpf_rlimit.h" #define CHECK(condition, tag, format...) ({ \ @@ -66,8 +67,8 @@ int main(int argc, char **argv) if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno)) return 1; - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) goto cleanup_cgroup_env; cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids"); diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c index f4d870da7684..78c76496b14a 100644 --- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -68,13 +68,6 @@ static void map_batch_verify(int *visited, __u32 max_entries, int *keys, static void __test_map_lookup_and_update_batch(bool is_pcpu) { - struct bpf_create_map_attr xattr = { - .name = "array_map", - .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : - BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(__s64), - }; int map_fd, *keys, *visited; __u32 count, total, total_success; const __u32 max_entries = 10; @@ -86,10 +79,10 @@ static void __test_map_lookup_and_update_batch(bool is_pcpu) .flags = 0, ); - xattr.max_entries = max_entries; - map_fd = bpf_create_map_xattr(&xattr); + map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY, + "array_map", sizeof(int), sizeof(__s64), max_entries, NULL); CHECK(map_fd == -1, - "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + "bpf_map_create()", "error:%s\n", strerror(errno)); value_size = sizeof(__s64); if (is_pcpu) diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c index 976bf415fbdd..f807d53fd8dd 100644 --- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -83,22 +83,15 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu) int err, step, value_size; bool nospace_err; void *values; - struct bpf_create_map_attr xattr = { - .name = "hash_map", - .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : - BPF_MAP_TYPE_HASH, - .key_size = sizeof(int), - .value_size = sizeof(int), - }; DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, .elem_flags = 0, .flags = 0, ); - xattr.max_entries = max_entries; - map_fd = bpf_create_map_xattr(&xattr); + map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH, + "hash_map", sizeof(int), sizeof(int), max_entries, NULL); CHECK(map_fd == -1, - "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + "bpf_map_create()", "error:%s\n", strerror(errno)); value_size = is_pcpu ? sizeof(value) : sizeof(int); keys = malloc(max_entries * sizeof(int)); diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c index 2e986e5e4cac..87d07b596e17 100644 --- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c @@ -64,13 +64,7 @@ static void map_batch_verify(int *visited, __u32 max_entries, void test_lpm_trie_map_batch_ops(void) { - struct bpf_create_map_attr xattr = { - .name = "lpm_trie_map", - .map_type = BPF_MAP_TYPE_LPM_TRIE, - .key_size = sizeof(struct test_lpm_key), - .value_size = sizeof(int), - .map_flags = BPF_F_NO_PREALLOC, - }; + LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC); struct test_lpm_key *keys, key; int map_fd, *values, *visited; __u32 step, count, total, total_success; @@ -82,9 +76,10 @@ void test_lpm_trie_map_batch_ops(void) .flags = 0, ); - xattr.max_entries = max_entries; - map_fd = bpf_create_map_xattr(&xattr); - CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n", + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", + sizeof(struct test_lpm_key), sizeof(int), + max_entries, &create_opts); + CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", strerror(errno)); keys = malloc(max_entries * sizeof(struct test_lpm_key)); diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c index e569edc679d8..099eb4dfd4f7 100644 --- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c @@ -19,16 +19,12 @@ #include <test_btf.h> #include <test_maps.h> -static struct bpf_create_map_attr xattr = { - .name = "sk_storage_map", - .map_type = BPF_MAP_TYPE_SK_STORAGE, - .map_flags = BPF_F_NO_PREALLOC, - .max_entries = 0, - .key_size = 4, - .value_size = 8, +static struct bpf_map_create_opts map_opts = { + .sz = sizeof(map_opts), .btf_key_type_id = 1, .btf_value_type_id = 3, .btf_fd = -1, + .map_flags = BPF_F_NO_PREALLOC, }; static unsigned int nr_sk_threads_done; @@ -140,7 +136,7 @@ static int load_btf(void) memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types), btf_str_sec, sizeof(btf_str_sec)); - return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0); + return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL); } static int create_sk_storage_map(void) @@ -150,13 +146,13 @@ static int create_sk_storage_map(void) btf_fd = load_btf(); CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", btf_fd, errno); - xattr.btf_fd = btf_fd; + map_opts.btf_fd = btf_fd; - map_fd = bpf_create_map_xattr(&xattr); - xattr.btf_fd = -1; + map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); + map_opts.btf_fd = -1; close(btf_fd); CHECK(map_fd == -1, - "bpf_create_map_xattr()", "errno:%d\n", errno); + "bpf_map_create()", "errno:%d\n", errno); return map_fd; } @@ -463,20 +459,20 @@ static void test_sk_storage_map_basic(void) int cnt; int lock; } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value; - struct bpf_create_map_attr bad_xattr; + struct bpf_map_create_opts bad_xattr; int btf_fd, map_fd, sk_fd, err; btf_fd = load_btf(); CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n", btf_fd, errno); - xattr.btf_fd = btf_fd; + map_opts.btf_fd = btf_fd; sk_fd = socket(AF_INET6, SOCK_STREAM, 0); CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n", sk_fd, errno); - map_fd = bpf_create_map_xattr(&xattr); - CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)", + map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts); + CHECK(map_fd == -1, "bpf_map_create(good_xattr)", "map_fd:%d errno:%d\n", map_fd, errno); /* Add new elem */ @@ -560,31 +556,29 @@ static void test_sk_storage_map_basic(void) CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); + memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); bad_xattr.btf_key_type_id = 0; - err = bpf_create_map_xattr(&bad_xattr); - CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); + CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); + memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); bad_xattr.btf_key_type_id = 3; - err = bpf_create_map_xattr(&bad_xattr); - CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); + CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); - bad_xattr.max_entries = 1; - err = bpf_create_map_xattr(&bad_xattr); - CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts); + CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)", "err:%d errno:%d\n", err, errno); - memcpy(&bad_xattr, &xattr, sizeof(xattr)); + memcpy(&bad_xattr, &map_opts, sizeof(map_opts)); bad_xattr.map_flags = 0; - err = bpf_create_map_xattr(&bad_xattr); + err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr); CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)", "err:%d errno:%d\n", err, errno); - xattr.btf_fd = -1; + map_opts.btf_fd = -1; close(btf_fd); close(map_fd); close(sk_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 5861446d0777..0ee29e11eaee 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -39,13 +39,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {1, "R1=ctx(id=0,off=0,imm=0)"}, - {1, "R10=fp0"}, - {1, "R3_w=inv2"}, - {2, "R3_w=inv4"}, - {3, "R3_w=inv8"}, - {4, "R3_w=inv16"}, - {5, "R3_w=inv32"}, + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, + {0, "R3_w=inv2"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv16"}, + {4, "R3_w=inv32"}, }, }, { @@ -67,19 +67,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {1, "R1=ctx(id=0,off=0,imm=0)"}, - {1, "R10=fp0"}, - {1, "R3_w=inv1"}, - {2, "R3_w=inv2"}, - {3, "R3_w=inv4"}, - {4, "R3_w=inv8"}, - {5, "R3_w=inv16"}, - {6, "R3_w=inv1"}, - {7, "R4_w=inv32"}, - {8, "R4_w=inv16"}, - {9, "R4_w=inv8"}, - {10, "R4_w=inv4"}, - {11, "R4_w=inv2"}, + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, + {0, "R3_w=inv1"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv1"}, + {6, "R4_w=inv32"}, + {7, "R4_w=inv16"}, + {8, "R4_w=inv8"}, + {9, "R4_w=inv4"}, + {10, "R4_w=inv2"}, }, }, { @@ -96,14 +96,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {1, "R1=ctx(id=0,off=0,imm=0)"}, - {1, "R10=fp0"}, - {1, "R3_w=inv4"}, - {2, "R3_w=inv8"}, - {3, "R3_w=inv10"}, - {4, "R4_w=inv8"}, - {5, "R4_w=inv12"}, - {6, "R4_w=inv14"}, + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, + {0, "R3_w=inv4"}, + {1, "R3_w=inv8"}, + {2, "R3_w=inv10"}, + {3, "R4_w=inv8"}, + {4, "R4_w=inv12"}, + {5, "R4_w=inv14"}, }, }, { @@ -118,12 +118,12 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {1, "R1=ctx(id=0,off=0,imm=0)"}, - {1, "R10=fp0"}, + {0, "R1=ctx(id=0,off=0,imm=0)"}, + {0, "R10=fp0"}, + {0, "R3_w=inv7"}, {1, "R3_w=inv7"}, - {2, "R3_w=inv7"}, - {3, "R3_w=inv14"}, - {4, "R3_w=inv56"}, + {2, "R3_w=inv14"}, + {3, "R3_w=inv56"}, }, }, @@ -161,19 +161,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {6, "R0_w=pkt(id=0,off=8,r=8,imm=0)"}, + {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {7, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {8, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {10, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {12, "R3_w=pkt_end(id=0,off=0,imm=0)"}, + {17, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {18, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {19, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {20, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {21, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {22, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -194,16 +194,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {6, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {7, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {11, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {12, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {13, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {15, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -234,14 +234,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, - {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, + {2, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {5, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, + {9, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -296,8 +296,8 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ @@ -313,11 +313,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {17, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -329,18 +329,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w=pkt(id=0,off=14,r=8"}, + {25, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {26, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, + {28, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -386,13 +386,13 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {7, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {8, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, - {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {12, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so @@ -403,12 +403,12 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {17, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, - {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, + {20, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so @@ -448,18 +448,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5_w=pkt_end(id=0,off=0,imm=0)"}, + {3, "R5_w=pkt_end(id=0,off=0,imm=0)"}, /* (ptr - ptr) << 2 == unknown, (4n) */ - {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, + {5, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ - {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {6, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, - {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {12, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -502,14 +502,14 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ - {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, + {11, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>= 0 */ {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, @@ -556,14 +556,14 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {6, "R2_w=pkt(id=0,off=0,r=8,imm=0)"}, + {9, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {14, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, @@ -594,6 +594,12 @@ static int do_test_single(struct bpf_align_test *test) struct bpf_insn *prog = test->insns; int prog_type = test->prog_type; char bpf_vlog_copy[32768]; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .prog_flags = BPF_F_STRICT_ALIGNMENT, + .log_buf = bpf_vlog, + .log_size = sizeof(bpf_vlog), + .log_level = 2, + ); const char *line_ptr; int cur_line = -1; int prog_len, i; @@ -601,9 +607,8 @@ static int do_test_single(struct bpf_align_test *test) int ret; prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, BPF_F_STRICT_ALIGNMENT, - "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); + fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", + prog, prog_len, &opts); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); @@ -620,12 +625,15 @@ static int do_test_single(struct bpf_align_test *test) line_ptr = strtok(bpf_vlog_copy, "\n"); for (i = 0; i < MAX_MATCHES; i++) { struct bpf_reg_match m = test->matches[i]; + int tmp; if (!m.match) break; while (line_ptr) { cur_line = -1; sscanf(line_ptr, "%u: ", &cur_line); + if (cur_line == -1) + sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line); if (cur_line == m.line) break; line_ptr = strtok(NULL, "\n"); @@ -637,7 +645,19 @@ static int do_test_single(struct bpf_align_test *test) printf("%s", bpf_vlog); break; } + /* Check the next line as well in case the previous line + * did not have a corresponding bpf insn. Example: + * func#0 @0 + * 0: R1=ctx(id=0,off=0,imm=0) R10=fp0 + * 0: (b7) r3 = 2 ; R3_w=inv2 + */ if (!strstr(line_ptr, m.match)) { + cur_line = -1; + line_ptr = strtok(NULL, "\n"); + sscanf(line_ptr, "%u: ", &cur_line); + } + if (cur_line != m.line || !line_ptr || + !strstr(line_ptr, m.match)) { printf("Failed to find match %u: %s\n", m.line, m.match); ret = 1; diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 0f9525293881..86b7d5d84eec 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -167,7 +167,7 @@ static void test_cmpxchg(struct atomics_lskel *skel) prog_fd = skel->progs.cmpxchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run add", + if (CHECK(err || retval, "test_run cmpxchg", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) goto cleanup; @@ -196,7 +196,7 @@ static void test_xchg(struct atomics_lskel *skel) prog_fd = skel->progs.xchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); - if (CHECK(err || retval, "test_run add", + if (CHECK(err || retval, "test_run xchg", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index be73e3de6668..d2d9e965eba5 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -7,32 +7,33 @@ static void test_fail_cases(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts); __u32 value; int fd, err; /* Invalid key size */ - fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0); - if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size")) + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 4, sizeof(value), 100, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid key size")) close(fd); /* Invalid value size */ - fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0); - if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0")) + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, 0, 100, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0")) close(fd); /* Invalid max entries size */ - fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0); - if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size")) + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size")) close(fd); /* Bloom filter maps do not support BPF_F_NO_PREALLOC */ - fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, - BPF_F_NO_PREALLOC); - if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags")) + opts.map_flags = BPF_F_NO_PREALLOC; + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); + if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid flags")) close(fd); - fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0); - if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter")) + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, NULL); + if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter")) return; /* Test invalid flags */ @@ -56,13 +57,14 @@ static void test_fail_cases(void) static void test_success_cases(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts); char value[11]; int fd, err; /* Create a map */ - fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, - BPF_F_ZERO_SEED | BPF_F_NUMA_NODE); - if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case")) + opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE; + fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts); + if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case")) return; /* Add a value to the bloom filter */ @@ -100,9 +102,9 @@ static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals struct bpf_link *link; /* Create a bloom filter map that will be used as the inner map */ - inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals), - nr_rand_vals, 0); - if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map")) + inner_map_fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(*rand_vals), + nr_rand_vals, NULL); + if (!ASSERT_GE(inner_map_fd, 0, "bpf_map_create bloom filter inner map")) return; for (i = 0; i < nr_rand_vals; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 9454331aaf85..b84f859b1267 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -469,12 +469,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * fills seq_file buffer and then the other will trigger * overflow and needs restart. */ - map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); - if (CHECK(map1_fd < 0, "bpf_create_map", + map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); + if (CHECK(map1_fd < 0, "bpf_map_create", "map_creation failed: %s\n", strerror(errno))) goto out; - map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); - if (CHECK(map2_fd < 0, "bpf_create_map", + map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); + if (CHECK(map2_fd < 0, "bpf_map_create", "map_creation failed: %s\n", strerror(errno))) goto free_map1; @@ -699,14 +699,13 @@ static void test_bpf_percpu_hash_map(void) char buf[64]; void *val; - val = malloc(8 * bpf_num_possible_cpus()); - skel = bpf_iter_bpf_percpu_hash_map__open(); if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", "skeleton open failed\n")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); + val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_hash_map__load(skel); if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", @@ -770,6 +769,7 @@ free_link: bpf_link__destroy(link); out: bpf_iter_bpf_percpu_hash_map__destroy(skel); + free(val); } static void test_bpf_array_map(void) @@ -870,14 +870,13 @@ static void test_bpf_percpu_array_map(void) void *val; int len; - val = malloc(8 * bpf_num_possible_cpus()); - skel = bpf_iter_bpf_percpu_array_map__open(); if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", "skeleton open failed\n")) return; skel->rodata->num_cpus = bpf_num_possible_cpus(); + val = malloc(8 * bpf_num_possible_cpus()); err = bpf_iter_bpf_percpu_array_map__load(skel); if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", @@ -933,6 +932,7 @@ free_link: bpf_link__destroy(link); out: bpf_iter_bpf_percpu_array_map__destroy(skel); + free(val); } /* An iterator program deletes all local storage in a map. */ @@ -1206,13 +1206,14 @@ static void test_task_vma(void) goto out; /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks - * to trigger seq_file corner cases. The expected output is much - * longer than 1kB, so the while loop will terminate. + * to trigger seq_file corner cases. */ len = 0; while (len < CMP_BUFFER_SIZE) { err = read_fd_into_buffer(iter_fd, task_vma_output + len, min(read_size, CMP_BUFFER_SIZE - len)); + if (!err) + break; if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n")) goto out; len += err; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c new file mode 100644 index 000000000000..380d7a2072e3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <network_helpers.h> +#include "bpf_loop.skel.h" + +static void check_nr_loops(struct bpf_loop *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.test_prog); + if (!ASSERT_OK_PTR(link, "link")) + return; + + /* test 0 loops */ + skel->bss->nr_loops = 0; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, + "0 loops"); + + /* test 500 loops */ + skel->bss->nr_loops = 500; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, + "500 loops"); + ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output"); + + /* test exceeding the max limit */ + skel->bss->nr_loops = -1; + + usleep(1); + + ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit"); + + bpf_link__destroy(link); +} + +static void check_callback_fn_stop(struct bpf_loop *skel) +{ + struct bpf_link *link; + + link = bpf_program__attach(skel->progs.test_prog); + if (!ASSERT_OK_PTR(link, "link")) + return; + + /* testing that loop is stopped when callback_fn returns 1 */ + skel->bss->nr_loops = 400; + skel->data->stop_index = 50; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1, + "nr_loops_returned"); + ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2, + "g_output"); + + bpf_link__destroy(link); +} + +static void check_null_callback_ctx(struct bpf_loop *skel) +{ + struct bpf_link *link; + + /* check that user is able to pass in a null callback_ctx */ + link = bpf_program__attach(skel->progs.prog_null_ctx); + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->nr_loops = 10; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops, + "nr_loops_returned"); + + bpf_link__destroy(link); +} + +static void check_invalid_flags(struct bpf_loop *skel) +{ + struct bpf_link *link; + + /* check that passing in non-zero flags returns -EINVAL */ + link = bpf_program__attach(skel->progs.prog_invalid_flags); + if (!ASSERT_OK_PTR(link, "link")) + return; + + usleep(1); + + ASSERT_EQ(skel->bss->err, -EINVAL, "err"); + + bpf_link__destroy(link); +} + +static void check_nested_calls(struct bpf_loop *skel) +{ + __u32 nr_loops = 100, nested_callback_nr_loops = 4; + struct bpf_link *link; + + /* check that nested calls are supported */ + link = bpf_program__attach(skel->progs.prog_nested_calls); + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->nr_loops = nr_loops; + skel->bss->nested_callback_nr_loops = nested_callback_nr_loops; + + usleep(1); + + ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops + * nested_callback_nr_loops, "nr_loops_returned"); + ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops + * nr_loops, "g_output"); + + bpf_link__destroy(link); +} + +void test_bpf_loop(void) +{ + struct bpf_loop *skel; + + skel = bpf_loop__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load")) + return; + + skel->bss->pid = getpid(); + + if (test__start_subtest("check_nr_loops")) + check_nr_loops(skel); + if (test__start_subtest("check_callback_fn_stop")) + check_callback_fn_stop(skel); + if (test__start_subtest("check_null_callback_ctx")) + check_null_callback_ctx(skel); + if (test__start_subtest("check_invalid_flags")) + check_invalid_flags(skel); + if (test__start_subtest("check_nested_calls")) + check_nested_calls(skel); + + bpf_loop__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index eb8eeebe6935..dbe56fa8582d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -48,7 +48,7 @@ void serial_test_bpf_obj_id(void) bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { now = time(NULL); - err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, + err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail * to load. @@ -65,8 +65,8 @@ void serial_test_bpf_obj_id(void) if (CHECK_FAIL(err)) goto done; - prog = bpf_object__find_program_by_title(objs[i], - "raw_tp/sys_enter"); + prog = bpf_object__find_program_by_name(objs[i], + "test_obj_id"); if (CHECK_FAIL(!prog)) goto done; links[i] = bpf_program__attach(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 94e03df69d71..8f7a1cef7d87 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -217,21 +217,22 @@ static bool found; static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { - char *log_buf; + const char *prog_name, *log_buf; if (level != LIBBPF_WARN || - strcmp(format, "libbpf: \n%s\n")) { + !strstr(format, "-- BEGIN PROG LOAD LOG --")) { vprintf(format, args); return 0; } + prog_name = va_arg(args, char *); log_buf = va_arg(args, char *); if (!log_buf) goto out; if (err_str && strstr(log_buf, err_str) != NULL) found = true; out: - printf(format, log_buf); + printf(format, prog_name, log_buf); return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 27f5d8ea7964..ff6cce9fef06 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -19,16 +19,28 @@ extern int extra_prog_load_log_flags; static int check_load(const char *file, enum bpf_prog_type type) { - struct bpf_prog_load_attr attr; struct bpf_object *obj = NULL; - int err, prog_fd; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.log_level = 4 | extra_prog_load_log_flags; - attr.prog_flags = BPF_F_TEST_RND_HI32; - err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + struct bpf_program *prog; + int err; + + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (err) + return err; + + prog = bpf_object__next_program(obj, NULL); + if (!prog) { + err = -ENOENT; + goto err_out; + } + + bpf_program__set_type(prog, type); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); + + err = bpf_object__load(obj); + +err_out: bpf_object__close(obj); return err; } @@ -115,6 +127,12 @@ void test_verif_scale_pyperf600() scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_pyperf600_bpf_loop(void) +{ + /* use the bpf_loop helper*/ + scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_pyperf600_nounroll() { /* no unroll at all. @@ -165,6 +183,12 @@ void test_verif_scale_strobemeta() scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); } +void test_verif_scale_strobemeta_bpf_loop(void) +{ + /* use the bpf_loop helper*/ + scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false); +} + void test_verif_scale_strobemeta_nounroll1() { /* no unroll, tiny loops */ diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ac596cb06e40..8ba53acf9eb4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -22,7 +22,6 @@ #include <bpf/libbpf.h> #include <bpf/btf.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "../test_btf.h" #include "test_progs.h" @@ -3939,6 +3938,23 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Invalid component_idx", }, +{ + .descr = "type_tag test #1", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [2] */ + BTF_PTR_ENC(2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ @@ -4046,20 +4062,40 @@ static void *btf_raw_create(const struct btf_header *hdr, next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL; done: + free(strs_idx); if (err) { - if (raw_btf) - free(raw_btf); - if (strs_idx) - free(strs_idx); + free(raw_btf); return NULL; } return raw_btf; } +static int load_raw_btf(const void *raw_data, size_t raw_size) +{ + LIBBPF_OPTS(bpf_btf_load_opts, opts); + int btf_fd; + + if (always_log) { + opts.log_buf = btf_log_buf, + opts.log_size = BTF_LOG_BUF_SIZE, + opts.log_level = 1; + } + + btf_fd = bpf_btf_load(raw_data, raw_size, &opts); + if (btf_fd < 0 && !always_log) { + opts.log_buf = btf_log_buf, + opts.log_size = BTF_LOG_BUF_SIZE, + opts.log_level = 1; + btf_fd = bpf_btf_load(raw_data, raw_size, &opts); + } + + return btf_fd; +} + static void do_test_raw(unsigned int test_num) { struct btf_raw_test *test = &raw_tests[test_num - 1]; - struct bpf_create_map_attr create_attr = {}; + LIBBPF_OPTS(bpf_map_create_opts, opts); int map_fd = -1, btf_fd = -1; unsigned int raw_btf_size; struct btf_header *hdr; @@ -4085,16 +4121,14 @@ static void do_test_raw(unsigned int test_num) hdr->str_len = (int)hdr->str_len + test->str_len_delta; *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - always_log); + btf_fd = load_raw_btf(raw_btf, raw_btf_size); free(raw_btf); err = ((btf_fd < 0) != test->btf_load_err); if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", btf_fd, test->btf_load_err) || CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), - "expected err_str:%s", test->err_str)) { + "expected err_str:%s\n", test->err_str)) { err = -1; goto done; } @@ -4102,16 +4136,11 @@ static void do_test_raw(unsigned int test_num) if (err || btf_fd < 0) goto done; - create_attr.name = test->map_name; - create_attr.map_type = test->map_type; - create_attr.key_size = test->key_size; - create_attr.value_size = test->value_size; - create_attr.max_entries = test->max_entries; - create_attr.btf_fd = btf_fd; - create_attr.btf_key_type_id = test->key_type_id; - create_attr.btf_value_type_id = test->value_type_id; - - map_fd = bpf_create_map_xattr(&create_attr); + opts.btf_fd = btf_fd; + opts.btf_key_type_id = test->key_type_id; + opts.btf_value_type_id = test->value_type_id; + map_fd = bpf_map_create(test->map_type, test->map_name, + test->key_size, test->value_size, test->max_entries, &opts); err = ((map_fd < 0) != test->map_create_err); CHECK(err, "map_fd:%d test->map_create_err:%u", @@ -4217,9 +4246,7 @@ static int test_big_btf_info(unsigned int test_num) goto done; } - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - always_log); + btf_fd = load_raw_btf(raw_btf, raw_btf_size); if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; @@ -4275,7 +4302,7 @@ done: static int test_btf_id(unsigned int test_num) { const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; - struct bpf_create_map_attr create_attr = {}; + LIBBPF_OPTS(bpf_map_create_opts, opts); uint8_t *raw_btf = NULL, *user_btf[2] = {}; int btf_fd[2] = {-1, -1}, map_fd = -1; struct bpf_map_info map_info = {}; @@ -4305,9 +4332,7 @@ static int test_btf_id(unsigned int test_num) info[i].btf_size = raw_btf_size; } - btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - always_log); + btf_fd[0] = load_raw_btf(raw_btf, raw_btf_size); if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { err = -1; goto done; @@ -4340,16 +4365,11 @@ static int test_btf_id(unsigned int test_num) } /* Test btf members in struct bpf_map_info */ - create_attr.name = "test_btf_id"; - create_attr.map_type = BPF_MAP_TYPE_ARRAY; - create_attr.key_size = sizeof(int); - create_attr.value_size = sizeof(unsigned int); - create_attr.max_entries = 4; - create_attr.btf_fd = btf_fd[0]; - create_attr.btf_key_type_id = 1; - create_attr.btf_value_type_id = 2; - - map_fd = bpf_create_map_xattr(&create_attr); + opts.btf_fd = btf_fd[0]; + opts.btf_key_type_id = 1; + opts.btf_value_type_id = 2; + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_btf_id", + sizeof(int), sizeof(int), 4, &opts); if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; @@ -4442,9 +4462,7 @@ static void do_test_get_info(unsigned int test_num) goto done; } - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - always_log); + btf_fd = load_raw_btf(raw_btf, raw_btf_size); if (CHECK(btf_fd <= 0, "errno:%d", errno)) { err = -1; goto done; @@ -5138,7 +5156,7 @@ static void do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; - struct bpf_create_map_attr create_attr = {}; + LIBBPF_OPTS(bpf_map_create_opts, opts); bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; unsigned int key, nr_read_elems; @@ -5164,26 +5182,19 @@ static void do_test_pprint(int test_num) return; *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - always_log); + btf_fd = load_raw_btf(raw_btf, raw_btf_size); free(raw_btf); - if (CHECK(btf_fd < 0, "errno:%d", errno)) { + if (CHECK(btf_fd < 0, "errno:%d\n", errno)) { err = -1; goto done; } - create_attr.name = test->map_name; - create_attr.map_type = test->map_type; - create_attr.key_size = test->key_size; - create_attr.value_size = test->value_size; - create_attr.max_entries = test->max_entries; - create_attr.btf_fd = btf_fd; - create_attr.btf_key_type_id = test->key_type_id; - create_attr.btf_value_type_id = test->value_type_id; - - map_fd = bpf_create_map_xattr(&create_attr); + opts.btf_fd = btf_fd; + opts.btf_key_type_id = test->key_type_id; + opts.btf_value_type_id = test->value_type_id; + map_fd = bpf_map_create(test->map_type, test->map_name, + test->key_size, test->value_size, test->max_entries, &opts); if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; @@ -6538,9 +6549,7 @@ static void do_test_info_raw(unsigned int test_num) return; *btf_log_buf = '\0'; - btf_fd = bpf_load_btf(raw_btf, raw_btf_size, - btf_log_buf, BTF_LOG_BUF_SIZE, - always_log); + btf_fd = load_raw_btf(raw_btf, raw_btf_size); free(raw_btf); if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) { @@ -6629,7 +6638,7 @@ struct btf_dedup_test { struct btf_dedup_opts opts; }; -const struct btf_dedup_test dedup_tests[] = { +static struct btf_dedup_test dedup_tests[] = { { .descr = "dedup: unused strings filtering", @@ -6649,9 +6658,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0int\0long"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: strings deduplication", @@ -6674,9 +6680,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0int\0long int"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: struct example #1", @@ -6757,9 +6760,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: struct <-> fwd resolution w/ hash collision", @@ -6802,8 +6802,7 @@ const struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0s\0x"), }, .opts = { - .dont_resolve_fwds = false, - .dedup_table_size = 1, /* force hash collisions */ + .force_collisions = true, /* force hash collisions */ }, }, { @@ -6849,8 +6848,7 @@ const struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0s\0x"), }, .opts = { - .dont_resolve_fwds = false, - .dedup_table_size = 1, /* force hash collisions */ + .force_collisions = true, /* force hash collisions */ }, }, { @@ -6874,15 +6872,16 @@ const struct btf_dedup_test dedup_tests[] = { BTF_RESTRICT_ENC(8), /* [11] restrict */ BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ + BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), }, .expect = { .raw_types = { @@ -6903,18 +6902,16 @@ const struct btf_dedup_test dedup_tests[] = { BTF_RESTRICT_ENC(8), /* [11] restrict */ BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18), BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */ BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ + BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"), - }, - .opts = { - .dont_resolve_fwds = false, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), }, }, { @@ -6967,9 +6964,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0int\0some other int\0float"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: enum fwd resolution", @@ -7011,9 +7005,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: datasec and vars pass-through", @@ -7056,8 +7047,7 @@ const struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0.bss\0t"), }, .opts = { - .dont_resolve_fwds = false, - .dedup_table_size = 1 + .force_collisions = true }, }, { @@ -7101,9 +7091,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: func/func_param tags", @@ -7154,9 +7141,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: struct/struct_member tags", @@ -7202,9 +7186,6 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"), }, - .opts = { - .dont_resolve_fwds = false, - }, }, { .descr = "dedup: typedef tags", @@ -7235,8 +7216,160 @@ const struct btf_dedup_test dedup_tests[] = { }, BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"), }, - .opts = { - .dont_resolve_fwds = false, +}, +{ + .descr = "dedup: btf_type_tag #1", + .input = { + .raw_types = { + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [5] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 5), /* [6] */ + BTF_PTR_ENC(6), /* [7] */ + /* ptr -> tag1 -> int */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [8] */ + BTF_PTR_ENC(8), /* [9] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0tag2"), + }, + .expect = { + .raw_types = { + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> tag1 -> int */ + BTF_PTR_ENC(2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0tag2"), + }, +}, +{ + .descr = "dedup: btf_type_tag #2", + .input = { + .raw_types = { + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> tag2 -> int */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ + BTF_PTR_ENC(5), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0tag2"), + }, + .expect = { + .raw_types = { + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> tag2 -> int */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ + BTF_PTR_ENC(5), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0tag2"), + }, +}, +{ + .descr = "dedup: btf_type_tag #3", + .input = { + .raw_types = { + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> tag1 -> tag2 -> int */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */ + BTF_PTR_ENC(6), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0tag2"), + }, + .expect = { + .raw_types = { + /* ptr -> tag2 -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> tag1 -> tag2 -> int */ + BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */ + BTF_PTR_ENC(6), /* [7] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0tag2"), + }, +}, +{ + .descr = "dedup: btf_type_tag #4", + .input = { + .raw_types = { + /* ptr -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_PTR_ENC(2), /* [3] */ + /* ptr -> tag1 -> long */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */ + BTF_PTR_ENC(5), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1"), + }, + .expect = { + .raw_types = { + /* ptr -> tag1 -> int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_PTR_ENC(2), /* [3] */ + /* ptr -> tag1 -> long */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */ + BTF_PTR_ENC(5), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1"), + }, +}, +{ + .descr = "dedup: btf_type_tag #5, struct", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [4] */ + BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [5] */ + BTF_MEMBER_ENC(NAME_NTH(3), 4, BTF_MEMBER_OFFSET(0, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0t\0m"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */ + BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0tag1\0t\0m"), }, }, @@ -7257,6 +7390,7 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_FLOAT: + case BTF_KIND_TYPE_TAG: return base_size; case BTF_KIND_INT: return base_size + sizeof(__u32); @@ -7295,7 +7429,7 @@ static void dump_btf_strings(const char *strs, __u32 len) static void do_test_dedup(unsigned int test_num) { - const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + struct btf_dedup_test *test = &dedup_tests[test_num - 1]; __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; const struct btf_header *test_hdr, *expect_hdr; struct btf *test_btf = NULL, *expect_btf = NULL; @@ -7339,7 +7473,8 @@ static void do_test_dedup(unsigned int test_num) goto done; } - err = btf__dedup(test_btf, NULL, &test->opts); + test->opts.sz = sizeof(test->opts); + err = btf__dedup(test_btf, &test->opts); if (CHECK(err, "btf_dedup failed errno:%d", err)) { err = -1; goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c index 64554fd33547..90aac437576d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c @@ -92,7 +92,7 @@ struct s2 {\n\ int *f3;\n\ };\n\n", "c_dump"); - err = btf__dedup(btf2, NULL, NULL); + err = btf__dedup(btf2, NULL); if (!ASSERT_OK(err, "btf_dedup")) goto cleanup; @@ -186,7 +186,7 @@ static void test_split_fwd_resolve() { "\t'f1' type_id=7 bits_offset=0\n" "\t'f2' type_id=9 bits_offset=64"); - err = btf__dedup(btf2, NULL, NULL); + err = btf__dedup(btf2, NULL); if (!ASSERT_OK(err, "btf_dedup")) goto cleanup; @@ -283,7 +283,7 @@ static void test_split_struct_duped() { "[13] STRUCT 's3' size=8 vlen=1\n" "\t'f1' type_id=12 bits_offset=0"); - err = btf__dedup(btf2, NULL, NULL); + err = btf__dedup(btf2, NULL); if (!ASSERT_OK(err, "btf_dedup")) goto cleanup; @@ -314,6 +314,117 @@ cleanup: btf__free(btf1); } +static void btf_add_dup_struct_in_cu(struct btf *btf, int start_id) +{ +#define ID(n) (start_id + n) + btf__set_pointer_size(btf, 8); /* enforce 64-bit arch */ + + btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */ + + btf__add_struct(btf, "s", 8); /* [2] struct s { */ + btf__add_field(btf, "a", ID(3), 0, 0); /* struct anon a; */ + btf__add_field(btf, "b", ID(4), 0, 0); /* struct anon b; */ + /* } */ + + btf__add_struct(btf, "(anon)", 8); /* [3] struct anon { */ + btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */ + btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */ + /* } */ + + btf__add_struct(btf, "(anon)", 8); /* [4] struct anon { */ + btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */ + btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */ + /* } */ +#undef ID +} + +static void test_split_dup_struct_in_cu() +{ + struct btf *btf1, *btf2 = NULL; + int err; + + /* generate the base data.. */ + btf1 = btf__new_empty(); + if (!ASSERT_OK_PTR(btf1, "empty_main_btf")) + return; + + btf_add_dup_struct_in_cu(btf1, 0); + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's' size=8 vlen=2\n" + "\t'a' type_id=3 bits_offset=0\n" + "\t'b' type_id=4 bits_offset=0", + "[3] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32", + "[4] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32"); + + /* ..dedup them... */ + err = btf__dedup(btf1, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + VALIDATE_RAW_BTF( + btf1, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's' size=8 vlen=2\n" + "\t'a' type_id=3 bits_offset=0\n" + "\t'b' type_id=3 bits_offset=0", + "[3] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32"); + + /* and add the same data on top of it */ + btf2 = btf__new_empty_split(btf1); + if (!ASSERT_OK_PTR(btf2, "empty_split_btf")) + goto cleanup; + + btf_add_dup_struct_in_cu(btf2, 3); + + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's' size=8 vlen=2\n" + "\t'a' type_id=3 bits_offset=0\n" + "\t'b' type_id=3 bits_offset=0", + "[3] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32", + "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[5] STRUCT 's' size=8 vlen=2\n" + "\t'a' type_id=6 bits_offset=0\n" + "\t'b' type_id=7 bits_offset=0", + "[6] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=4 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=32", + "[7] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=4 bits_offset=0\n" + "\t'f2' type_id=4 bits_offset=32"); + + err = btf__dedup(btf2, NULL); + if (!ASSERT_OK(err, "btf_dedup")) + goto cleanup; + + /* after dedup it should match the original data */ + VALIDATE_RAW_BTF( + btf2, + "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[2] STRUCT 's' size=8 vlen=2\n" + "\t'a' type_id=3 bits_offset=0\n" + "\t'b' type_id=3 bits_offset=0", + "[3] STRUCT '(anon)' size=8 vlen=2\n" + "\t'f1' type_id=1 bits_offset=0\n" + "\t'f2' type_id=1 bits_offset=32"); + +cleanup: + btf__free(btf2); + btf__free(btf1); +} + void test_btf_dedup_split() { if (test__start_subtest("split_simple")) @@ -322,4 +433,6 @@ void test_btf_dedup_split() test_split_struct_duped(); if (test__start_subtest("split_fwd_resolve")) test_split_fwd_resolve(); + if (test__start_subtest("split_dup_struct_in_cu")) + test_split_dup_struct_in_cu(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index aa76360d8f49..9e26903f9170 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -13,25 +13,23 @@ static struct btf_dump_test_case { const char *name; const char *file; bool known_ptr_sz; - struct btf_dump_opts opts; } btf_dump_test_cases[] = { - {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}}, - {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}}, - {"btf_dump: padding", "btf_dump_test_case_padding", true, {}}, - {"btf_dump: packing", "btf_dump_test_case_packing", true, {}}, - {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}}, - {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}}, - {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}}, + {"btf_dump: syntax", "btf_dump_test_case_syntax", true}, + {"btf_dump: ordering", "btf_dump_test_case_ordering", false}, + {"btf_dump: padding", "btf_dump_test_case_padding", true}, + {"btf_dump: packing", "btf_dump_test_case_packing", true}, + {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true}, + {"btf_dump: multidim", "btf_dump_test_case_multidim", false}, + {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false}, }; -static int btf_dump_all_types(const struct btf *btf, - const struct btf_dump_opts *opts) +static int btf_dump_all_types(const struct btf *btf, void *ctx) { size_t type_cnt = btf__type_cnt(btf); struct btf_dump *d; int err = 0, id; - d = btf_dump__new(btf, NULL, opts, btf_dump_printf); + d = btf_dump__new(btf, btf_dump_printf, ctx, NULL); err = libbpf_get_error(d); if (err) return err; @@ -88,8 +86,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) goto done; } - t->opts.ctx = f; - err = btf_dump_all_types(btf, &t->opts); + err = btf_dump_all_types(btf, f); fclose(f); close(fd); if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) { @@ -137,7 +134,6 @@ static void test_btf_dump_incremental(void) { struct btf *btf = NULL; struct btf_dump *d = NULL; - struct btf_dump_opts opts; int id, err, i; dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); @@ -146,8 +142,7 @@ static void test_btf_dump_incremental(void) btf = btf__new_empty(); if (!ASSERT_OK_PTR(btf, "new_empty")) goto err_out; - opts.ctx = dump_buf_file; - d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL); if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) goto err_out; @@ -328,7 +323,7 @@ static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, char *str) { #ifdef __SIZEOF_INT128__ - __int128 i = 0xffffffffffffffff; + unsigned __int128 i = 0xffffffffffffffff; /* this dance is required because we cannot directly initialize * a 128-bit value to anything larger than a 64-bit value. @@ -761,7 +756,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, /* overflow bpf_sock_ops struct with final element nonzero/zero. * Regardless of the value of the final field, we don't have all the * data we need to display it, so we should trigger an overflow. - * In other words oveflow checking should trump "is field zero?" + * In other words overflow checking should trump "is field zero?" * checks because if we've overflowed, it shouldn't matter what the * field is - we can't trust its value so shouldn't display it. */ @@ -814,26 +809,28 @@ static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, static void test_btf_dump_datasec_data(char *str) { - struct btf *btf = btf__parse("xdping_kern.o", NULL); - struct btf_dump_opts opts = { .ctx = str }; + struct btf *btf; char license[4] = "GPL"; struct btf_dump *d; + btf = btf__parse("xdping_kern.o", NULL); if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) return; - d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); if (!ASSERT_OK_PTR(d, "could not create BTF dump")) - return; + goto out; test_btf_datasec(btf, d, str, "license", "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];", license, sizeof(license)); +out: + btf_dump__free(d); + btf__free(btf); } void test_btf_dump() { char str[STRSIZE]; - struct btf_dump_opts opts = { .ctx = str }; struct btf_dump *d; struct btf *btf; int i; @@ -853,7 +850,7 @@ void test_btf_dump() { if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) return; - d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + d = btf_dump__new(btf, btf_dump_snprintf, str, NULL); if (!ASSERT_OK_PTR(d, "could not create BTF dump")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c index b1ffe61f2aa9..eef1158676ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_split.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c @@ -13,7 +13,6 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args) } void test_btf_split() { - struct btf_dump_opts opts; struct btf_dump *d = NULL; const struct btf_type *t; struct btf *btf1, *btf2; @@ -68,8 +67,7 @@ void test_btf_split() { dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) return; - opts.ctx = dump_buf_file; - d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf); + d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL); if (!ASSERT_OK_PTR(d, "btf_dump__new")) goto cleanup; for (i = 1; i < btf__type_cnt(btf2); i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c index 91821f42714d..88d63e23e35f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c @@ -1,20 +1,50 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ #include <test_progs.h> -#include "tag.skel.h" +#include "btf_decl_tag.skel.h" -void test_btf_tag(void) +/* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */ +struct btf_type_tag_test { + int **p; +}; +#include "btf_type_tag.skel.h" + +static void test_btf_decl_tag(void) +{ + struct btf_decl_tag *skel; + + skel = btf_decl_tag__open_and_load(); + if (!ASSERT_OK_PTR(skel, "btf_decl_tag")) + return; + + if (skel->rodata->skip_tests) { + printf("%s:SKIP: btf_decl_tag attribute not supported", __func__); + test__skip(); + } + + btf_decl_tag__destroy(skel); +} + +static void test_btf_type_tag(void) { - struct tag *skel; + struct btf_type_tag *skel; - skel = tag__open_and_load(); - if (!ASSERT_OK_PTR(skel, "btf_tag")) + skel = btf_type_tag__open_and_load(); + if (!ASSERT_OK_PTR(skel, "btf_type_tag")) return; if (skel->rodata->skip_tests) { - printf("%s:SKIP: btf_tag attribute not supported", __func__); + printf("%s:SKIP: btf_type_tag attribute not supported", __func__); test__skip(); } - tag__destroy(skel); + btf_type_tag__destroy(skel); +} + +void test_btf_tag(void) +{ + if (test__start_subtest("btf_decl_tag")) + test_btf_decl_tag(); + if (test__start_subtest("btf_type_tag")) + test_btf_type_tag(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index b912eeb0b6b4..addf99c05896 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -297,6 +297,16 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx"); ASSERT_STREQ(btf_type_raw_dump(btf, 19), "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump"); + + /* TYPE_TAG */ + id = btf__add_type_tag(btf, "tag1", 1); + ASSERT_EQ(id, 20, "tag_id"); + t = btf__type_by_id(btf, 20); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value"); + ASSERT_EQ(btf_kind(t), BTF_KIND_TYPE_TAG, "tag_kind"); + ASSERT_EQ(t->type, 1, "tag_type"); + ASSERT_STREQ(btf_type_raw_dump(btf, 20), + "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); } static void test_btf_add() @@ -337,7 +347,8 @@ static void test_btf_add() "[17] DATASEC 'datasec1' size=12 vlen=1\n" "\ttype_id=1 offset=4 size=8", "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", - "[19] DECL_TAG 'tag2' type_id=14 component_idx=1"); + "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", + "[20] TYPE_TAG 'tag1' type_id=1"); btf__free(btf); } @@ -359,7 +370,7 @@ static void test_btf_add_btf() gen_btf(btf2); id = btf__add_btf(btf1, btf2); - if (!ASSERT_EQ(id, 20, "id")) + if (!ASSERT_EQ(id, 21, "id")) goto cleanup; VALIDATE_RAW_BTF( @@ -391,35 +402,37 @@ static void test_btf_add_btf() "\ttype_id=1 offset=4 size=8", "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", + "[20] TYPE_TAG 'tag1' type_id=1", /* types appended from the second BTF */ - "[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[21] PTR '(anon)' type_id=20", - "[22] CONST '(anon)' type_id=24", - "[23] VOLATILE '(anon)' type_id=22", - "[24] RESTRICT '(anon)' type_id=23", - "[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10", - "[26] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=20 bits_offset=0\n" - "\t'f2' type_id=20 bits_offset=32 bitfield_size=16", - "[27] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=20 bits_offset=0 bitfield_size=16", - "[28] ENUM 'e1' size=4 vlen=2\n" + "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[22] PTR '(anon)' type_id=21", + "[23] CONST '(anon)' type_id=25", + "[24] VOLATILE '(anon)' type_id=23", + "[25] RESTRICT '(anon)' type_id=24", + "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", + "[27] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=21 bits_offset=0\n" + "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", + "[28] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", + "[29] ENUM 'e1' size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", - "[29] FWD 'struct_fwd' fwd_kind=struct", - "[30] FWD 'union_fwd' fwd_kind=union", - "[31] ENUM 'enum_fwd' size=4 vlen=0", - "[32] TYPEDEF 'typedef1' type_id=20", - "[33] FUNC 'func1' type_id=34 linkage=global", - "[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n" - "\t'p1' type_id=20\n" - "\t'p2' type_id=21", - "[35] VAR 'var1' type_id=20, linkage=global-alloc", - "[36] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=20 offset=4 size=8", - "[37] DECL_TAG 'tag1' type_id=35 component_idx=-1", - "[38] DECL_TAG 'tag2' type_id=33 component_idx=1"); + "[30] FWD 'struct_fwd' fwd_kind=struct", + "[31] FWD 'union_fwd' fwd_kind=union", + "[32] ENUM 'enum_fwd' size=4 vlen=0", + "[33] TYPEDEF 'typedef1' type_id=21", + "[34] FUNC 'func1' type_id=35 linkage=global", + "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" + "\t'p1' type_id=21\n" + "\t'p2' type_id=22", + "[36] VAR 'var1' type_id=21, linkage=global-alloc", + "[37] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=21 offset=4 size=8", + "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", + "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", + "[40] TYPE_TAG 'tag1' type_id=21"); cleanup: btf__free(btf1); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c index 5de485c7370f..858916d11e2e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -16,7 +16,7 @@ static int prog_load(void) }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 731bea84d8ed..d3e8f729c623 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -15,22 +15,22 @@ static int prog_load_cnt(int verdict, int val) int cgroup_storage_fd, percpu_cgroup_storage_fd; if (map_fd < 0) - map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); if (map_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); return -1; } - cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + cgroup_storage_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, + sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL); if (cgroup_storage_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); return -1; } - percpu_cgroup_storage_fd = bpf_create_map( - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + percpu_cgroup_storage_fd = bpf_map_create( + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL, + sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL); if (percpu_cgroup_storage_fd < 0) { printf("failed to create map '%s'\n", strerror(errno)); return -1; @@ -66,7 +66,7 @@ static int prog_load_cnt(int verdict, int val) size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); int ret; - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c index 10d3c33821a7..356547e849e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -18,7 +18,7 @@ static int prog_load(int verdict) }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); } diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c index 9229db2f5ca5..9c4325f4aef2 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -51,24 +51,25 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) bool v4 = family == AF_INET; __u16 expected_local_port = v4 ? 22222 : 22223; __u16 expected_peer_port = 60000; - struct bpf_prog_load_attr attr = { - .file = v4 ? "./connect_force_port4.o" : - "./connect_force_port6.o", - }; struct bpf_program *prog; struct bpf_object *obj; - int xlate_fd, fd, err; + const char *obj_file = v4 ? "connect_force_port4.o" : "connect_force_port6.o"; + int fd, err; __u32 duration = 0; - err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd); - if (err) { - log_err("Failed to load BPF object"); + obj = bpf_object__open_file(obj_file, NULL); + if (!ASSERT_OK_PTR(obj, "bpf_obj_open")) return -1; + + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "bpf_obj_load")) { + err = -EIO; + goto close_bpf_object; } - prog = bpf_object__find_program_by_title(obj, v4 ? - "cgroup/connect4" : - "cgroup/connect6"); + prog = bpf_object__find_program_by_name(obj, v4 ? + "connect4" : + "connect6"); if (CHECK(!prog, "find_prog", "connect prog not found\n")) { err = -EIO; goto close_bpf_object; @@ -82,9 +83,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - prog = bpf_object__find_program_by_title(obj, v4 ? - "cgroup/getpeername4" : - "cgroup/getpeername6"); + prog = bpf_object__find_program_by_name(obj, v4 ? + "getpeername4" : + "getpeername6"); if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) { err = -EIO; goto close_bpf_object; @@ -98,9 +99,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - prog = bpf_object__find_program_by_title(obj, v4 ? - "cgroup/getsockname4" : - "cgroup/getsockname6"); + prog = bpf_object__find_program_by_name(obj, v4 ? + "getsockname4" : + "getsockname6"); if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) { err = -EIO; goto close_bpf_object; diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c new file mode 100644 index 000000000000..561c5185d886 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "test_progs.h" +#include "core_kern.lskel.h" + +void test_core_kern_lskel(void) +{ + struct core_kern_lskel *skel; + + skel = core_kern_lskel__open_and_load(); + ASSERT_OK_PTR(skel, "open_and_load"); + core_kern_lskel__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 55ec85ba7375..b8bdd1c3efca 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -10,7 +10,7 @@ static int duration = 0; #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) -#define MODULES_CASE(name, sec_name, tp_name) { \ +#define MODULES_CASE(name, pg_name, tp_name) { \ .case_name = name, \ .bpf_obj_file = "test_core_reloc_module.o", \ .btf_src_file = NULL, /* find in kernel module BTFs */ \ @@ -28,7 +28,7 @@ static int duration = 0; .comm_len = sizeof("test_progs"), \ }, \ .output_len = sizeof(struct core_reloc_module_output), \ - .prog_sec_name = sec_name, \ + .prog_name = pg_name, \ .raw_tp_name = tp_name, \ .trigger = __trigger_module_test_read, \ .needs_testmod = true, \ @@ -43,7 +43,9 @@ static int duration = 0; #define FLAVORS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_flavors.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_flavors" \ #define FLAVORS_CASE(name) { \ FLAVORS_CASE_COMMON(name), \ @@ -66,7 +68,9 @@ static int duration = 0; #define NESTING_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_nesting.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_nesting" \ #define NESTING_CASE(name) { \ NESTING_CASE_COMMON(name), \ @@ -91,7 +95,9 @@ static int duration = 0; #define ARRAYS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_arrays.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_arrays" \ #define ARRAYS_CASE(name) { \ ARRAYS_CASE_COMMON(name), \ @@ -123,7 +129,9 @@ static int duration = 0; #define PRIMITIVES_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_primitives.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_primitives" \ #define PRIMITIVES_CASE(name) { \ PRIMITIVES_CASE_COMMON(name), \ @@ -158,6 +166,8 @@ static int duration = 0; .e = 5, .f = 6, .g = 7, .h = 8, \ }, \ .output_len = sizeof(struct core_reloc_mods_output), \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_mods", \ } #define PTR_AS_ARR_CASE(name) { \ @@ -174,6 +184,8 @@ static int duration = 0; .a = 3, \ }, \ .output_len = sizeof(struct core_reloc_ptr_as_arr), \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_ptr_as_arr", \ } #define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ @@ -190,7 +202,9 @@ static int duration = 0; #define INTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_ints.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_ints" #define INTS_CASE(name) { \ INTS_CASE_COMMON(name), \ @@ -208,7 +222,9 @@ static int duration = 0; #define FIELD_EXISTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_existence.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_existence" #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \ .case_name = test_name_prefix#name, \ @@ -223,6 +239,8 @@ static int duration = 0; .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ __VA_ARGS__, \ .output_len = sizeof(struct core_reloc_bitfields_output), \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ @@ -231,7 +249,7 @@ static int duration = 0; .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ __VA_ARGS__, \ .output_len = sizeof(struct core_reloc_bitfields_output), \ - .prog_sec_name = "tp_btf/sys_enter", \ + .prog_name = "test_core_bitfields_direct", \ } @@ -239,17 +257,21 @@ static int duration = 0; BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ "probed:", name), \ .fails = true, \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ - .prog_sec_name = "tp_btf/sys_enter", \ .fails = true, \ + .prog_name = "test_core_bitfields_direct", \ } #define SIZE_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_size.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_size" #define SIZE_OUTPUT_DATA(type) \ STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ @@ -277,8 +299,10 @@ static int duration = 0; #define TYPE_BASED_CASE_COMMON(name) \ .case_name = #name, \ - .bpf_obj_file = "test_core_reloc_type_based.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" \ + .bpf_obj_file = "test_core_reloc_type_based.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_type_based" #define TYPE_BASED_CASE(name, ...) { \ TYPE_BASED_CASE_COMMON(name), \ @@ -295,7 +319,9 @@ static int duration = 0; #define TYPE_ID_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_type_id.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_type_id" #define TYPE_ID_CASE(name, setup_fn) { \ TYPE_ID_CASE_COMMON(name), \ @@ -312,7 +338,9 @@ static int duration = 0; #define ENUMVAL_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_enumval.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o" \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_enumval" #define ENUMVAL_CASE(name, ...) { \ ENUMVAL_CASE_COMMON(name), \ @@ -342,7 +370,7 @@ struct core_reloc_test_case { bool fails; bool needs_testmod; bool relaxed_core_relocs; - const char *prog_sec_name; + const char *prog_name; const char *raw_tp_name; setup_test_fn setup; trigger_test_fn trigger; @@ -433,7 +461,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) static int setup_type_id_case_success(struct core_reloc_test_case *test) { struct core_reloc_type_id_output *exp = (void *)test->output; - struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + struct btf *targ_btf; int err; err = setup_type_id_case_local(test); @@ -497,11 +525,13 @@ static struct core_reloc_test_case test_cases[] = { .comm_len = sizeof("test_progs"), }, .output_len = sizeof(struct core_reloc_kernel_output), + .raw_tp_name = "sys_enter", + .prog_name = "test_core_kernel", }, /* validate we can find kernel module BTF types for relocs/attach */ - MODULES_CASE("module_probed", "raw_tp/bpf_testmod_test_read", "bpf_testmod_test_read"), - MODULES_CASE("module_direct", "tp_btf/bpf_testmod_test_read", NULL), + MODULES_CASE("module_probed", "test_core_module_probed", "bpf_testmod_test_read"), + MODULES_CASE("module_direct", "test_core_module_direct", NULL), /* validate BPF program can use multiple flavors to match against * single target BTF type @@ -580,6 +610,8 @@ static struct core_reloc_test_case test_cases[] = { .c = 0, /* BUG in clang, should be 3 */ }, .output_len = sizeof(struct core_reloc_misc_output), + .raw_tp_name = "sys_enter", + .prog_name = "test_core_misc", }, /* validate field existence checks */ @@ -848,14 +880,9 @@ void test_core_reloc(void) if (!ASSERT_OK_PTR(obj, "obj_open")) goto cleanup; - probe_name = "raw_tracepoint/sys_enter"; - tp_name = "sys_enter"; - if (test_case->prog_sec_name) { - probe_name = test_case->prog_sec_name; - tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ - } - - prog = bpf_object__find_program_by_title(obj, probe_name); + probe_name = test_case->prog_name; + tp_name = test_case->raw_tp_name; /* NULL for tp_btf */ + prog = bpf_object__find_program_by_name(obj, probe_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name)) goto cleanup; @@ -881,7 +908,8 @@ void test_core_reloc(void) data = mmap_data; memset(mmap_data, 0, sizeof(*data)); - memcpy(data->in, test_case->input, test_case->input_len); + if (test_case->input_len) + memcpy(data->in, test_case->input, test_case->input_len); data->my_pid_tgid = my_pid_tgid; link = bpf_program__attach_raw_tracepoint(prog, tp_name); diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 0a577a248d34..911345c526e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -9,6 +9,8 @@ #define MAX_FILES 7 #include "test_d_path.skel.h" +#include "test_d_path_check_rdonly_mem.skel.h" +#include "test_d_path_check_types.skel.h" static int duration; @@ -99,7 +101,7 @@ out_close: return ret; } -void test_d_path(void) +static void test_d_path_basic(void) { struct test_d_path__bss *bss; struct test_d_path *skel; @@ -155,3 +157,35 @@ void test_d_path(void) cleanup: test_d_path__destroy(skel); } + +static void test_d_path_check_rdonly_mem(void) +{ + struct test_d_path_check_rdonly_mem *skel; + + skel = test_d_path_check_rdonly_mem__open_and_load(); + ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem"); + + test_d_path_check_rdonly_mem__destroy(skel); +} + +static void test_d_path_check_types(void) +{ + struct test_d_path_check_types *skel; + + skel = test_d_path_check_types__open_and_load(); + ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type"); + + test_d_path_check_types__destroy(skel); +} + +void test_d_path(void) +{ + if (test__start_subtest("basic")) + test_d_path_basic(); + + if (test__start_subtest("check_rdonly_mem")) + test_d_path_check_rdonly_mem(); + + if (test__start_subtest("check_alloc_mem")) + test_d_path_check_types(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/exhandler.c b/tools/testing/selftests/bpf/prog_tests/exhandler.c new file mode 100644 index 000000000000..118bb182ee20 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/exhandler.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021, Oracle and/or its affiliates. */ + +#include <test_progs.h> + +/* Test that verifies exception handling is working. fork() + * triggers task_newtask tracepoint; that new task will have a + * NULL pointer task_works, and the associated task->task_works->func + * should not be NULL if task_works itself is non-NULL. + * + * So to verify exception handling we want to see a NULL task_works + * and task_works->func; if we see this we can conclude that the + * exception handler ran when we attempted to dereference task->task_works + * and zeroed the destination register. + */ +#include "exhandler_kern.skel.h" + +void test_exhandler(void) +{ + int err = 0, duration = 0, status; + struct exhandler_kern *skel; + pid_t cpid; + + skel = exhandler_kern__open_and_load(); + if (CHECK(!skel, "skel_load", "skeleton failed: %d\n", err)) + goto cleanup; + + skel->bss->test_pid = getpid(); + + err = exhandler_kern__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto cleanup; + cpid = fork(); + if (!ASSERT_GT(cpid, -1, "fork failed")) + goto cleanup; + if (cpid == 0) + _exit(0); + waitpid(cpid, &status, 0); + + ASSERT_NEQ(skel->bss->exception_triggered, 0, "verify exceptions occurred"); +cleanup: + exhandler_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 9cff14a23bb7..c52f99f6a909 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -65,7 +65,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, int err, tgt_fd, i; struct btf *btf; - err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); if (!ASSERT_OK(err, "tgt_prog_load")) return; @@ -101,6 +101,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, for (i = 0; i < prog_cnt; i++) { struct bpf_link_info link_info; + struct bpf_program *pos; + const char *pos_sec_name; char *tgt_name; __s32 btf_id; @@ -109,7 +111,14 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC); - prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); + prog[i] = NULL; + bpf_object__for_each_program(pos, obj) { + pos_sec_name = bpf_program__section_name(pos); + if (pos_sec_name && !strcmp(pos_sec_name, prog_name[i])) { + prog[i] = pos; + break; + } + } if (!ASSERT_OK_PTR(prog[i], prog_name[i])) goto close_prog; @@ -211,8 +220,8 @@ static void test_func_replace_verify(void) static int test_second_attach(struct bpf_object *obj) { - const char *prog_name = "freplace/get_constant"; - const char *tgt_name = prog_name + 9; /* cut off freplace/ */ + const char *prog_name = "security_new_get_constant"; + const char *tgt_name = "get_constant"; const char *tgt_obj_file = "./test_pkt_access.o"; struct bpf_program *prog = NULL; struct bpf_object *tgt_obj; @@ -220,11 +229,11 @@ static int test_second_attach(struct bpf_object *obj) struct bpf_link *link; int err = 0, tgt_fd; - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) return -ENOENT; - err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, &tgt_obj, &tgt_fd); if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", tgt_obj_file, err, errno)) @@ -274,7 +283,7 @@ static void test_fmod_ret_freplace(void) __u32 duration = 0; int err, pkt_fd, attach_prog_fd; - err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_test_load(tgt_name, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); /* the target prog should load fine */ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", @@ -341,7 +350,7 @@ static void test_obj_load_failure_common(const char *obj_file, int err, pkt_fd; __u32 duration = 0; - err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, &pkt_obj, &pkt_fd); /* the target prog should load fine */ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index 7c9b62e971f1..e4cede6b4b2d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -20,34 +20,33 @@ void test_fexit_stress(void) BPF_EXIT_INSN(), }; - struct bpf_load_program_attr load_attr = { - .prog_type = BPF_PROG_TYPE_TRACING, - .license = "GPL", - .insns = trace_program, - .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .expected_attach_type = BPF_TRACE_FEXIT, - }; + .log_buf = error, + .log_size = sizeof(error), + ); const struct bpf_insn skb_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - struct bpf_load_program_attr skb_load_attr = { - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .license = "GPL", - .insns = skb_program, - .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), - }; + LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, + .log_buf = error, + .log_size = sizeof(error), + ); err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", - load_attr.expected_attach_type); + trace_opts.expected_attach_type); if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) goto out; - load_attr.attach_btf_id = err; + trace_opts.attach_btf_id = err; for (i = 0; i < CNT; i++) { - fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL", + trace_program, + sizeof(trace_program) / sizeof(struct bpf_insn), + &trace_opts); if (CHECK(fexit_fd[i] < 0, "fexit loaded", "failed: %d errno %d\n", fexit_fd[i], errno)) goto out; @@ -57,7 +56,9 @@ void test_fexit_stress(void) goto out; } - filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", + skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), + &skb_opts); if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", filter_fd, errno)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c new file mode 100644 index 000000000000..b74b3c0c555a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include <sys/types.h> +#include <unistd.h> +#include "find_vma.skel.h" +#include "find_vma_fail1.skel.h" +#include "find_vma_fail2.skel.h" + +static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret) +{ + ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec"); + ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret"); + ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret"); + ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs"); + + skel->bss->found_vm_exec = 0; + skel->data->find_addr_ret = -1; + skel->data->find_zero_ret = -1; + skel->bss->d_iname[0] = 0; +} + +static int open_pe(void) +{ + struct perf_event_attr attr = {0}; + int pfd; + + /* create perf event */ + attr.size = sizeof(attr); + attr.type = PERF_TYPE_HARDWARE; + attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.freq = 1; + attr.sample_freq = 4000; + pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC); + + return pfd >= 0 ? pfd : -errno; +} + +static void test_find_vma_pe(struct find_vma *skel) +{ + struct bpf_link *link = NULL; + volatile int j = 0; + int pfd, i; + + pfd = open_pe(); + if (pfd < 0) { + if (pfd == -ENOENT || pfd == -EOPNOTSUPP) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (!ASSERT_GE(pfd, 0, "perf_event_open")) + goto cleanup; + } + + link = bpf_program__attach_perf_event(skel->progs.handle_pe, pfd); + if (!ASSERT_OK_PTR(link, "attach_perf_event")) + goto cleanup; + + for (i = 0; i < 1000000; ++i) + ++j; + + test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */); +cleanup: + bpf_link__destroy(link); + close(pfd); +} + +static void test_find_vma_kprobe(struct find_vma *skel) +{ + int err; + + err = find_vma__attach(skel); + if (!ASSERT_OK(err, "get_branch_snapshot__attach")) + return; + + getpgid(skel->bss->target_pid); + test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */); +} + +static void test_illegal_write_vma(void) +{ + struct find_vma_fail1 *skel; + + skel = find_vma_fail1__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "find_vma_fail1__open_and_load")) + find_vma_fail1__destroy(skel); +} + +static void test_illegal_write_task(void) +{ + struct find_vma_fail2 *skel; + + skel = find_vma_fail2__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "find_vma_fail2__open_and_load")) + find_vma_fail2__destroy(skel); +} + +void serial_test_find_vma(void) +{ + struct find_vma *skel; + + skel = find_vma__open_and_load(); + if (!ASSERT_OK_PTR(skel, "find_vma__open_and_load")) + return; + + skel->bss->target_pid = getpid(); + skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe; + + test_find_vma_pe(skel); + usleep(100000); /* allow the irq_work to finish */ + test_find_vma_kprobe(skel); + + find_vma__destroy(skel); + test_illegal_write_vma(); + test_illegal_write_task(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c index 6093728497c7..93ac3f28226c 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -30,7 +30,7 @@ void serial_test_flow_dissector_load_bytes(void) /* make sure bpf_skb_load_bytes is not allowed from skb-less context */ - fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); CHECK(fd < 0, "flow_dissector-bpf_skb_load_bytes-load", diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index f0c6c226aba8..7c79462d2702 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -47,9 +47,9 @@ static int load_prog(enum bpf_prog_type type) }; int fd; - fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + fd = bpf_test_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); if (CHECK_FAIL(fd < 0)) - perror("bpf_load_program"); + perror("bpf_test_load_program"); return fd; } diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c new file mode 100644 index 000000000000..85c427119fe9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "get_func_args_test.skel.h" + +void test_get_func_args_test(void) +{ + struct get_func_args_test *skel = NULL; + __u32 duration = 0, retval; + int err, prog_fd; + + skel = get_func_args_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load")) + return; + + err = get_func_args_test__attach(skel); + if (!ASSERT_OK(err, "get_func_args_test__attach")) + goto cleanup; + + /* This runs bpf_fentry_test* functions and triggers + * fentry/fexit programs. + */ + prog_fd = bpf_program__fd(skel->progs.test1); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 0, "test_run"); + + /* This runs bpf_modify_return_test function and triggers + * fmod_ret_test and fexit_test programs. + */ + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, + NULL, NULL, &retval, &duration); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(retval, 1234, "test_run"); + + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); + ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); + ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); + ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); + +cleanup: + get_func_args_test__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index 522237aa4470..e834a01de16a 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -24,13 +24,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) { bool good_kern_stack = false, good_user_stack = false; const char *nonjit_func = "___bpf_prog_run"; - struct get_stack_trace_t *e = data; + /* perfbuf-submitted data is 4-byte aligned, but we need 8-byte + * alignment, so copy data into a local variable, for simplicity + */ + struct get_stack_trace_t e; int i, num_stack; static __u64 cnt; struct ksym *ks; cnt++; + memset(&e, 0, sizeof(e)); + memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e)); + if (size < sizeof(struct get_stack_trace_t)) { __u64 *raw_data = data; bool found = false; @@ -57,19 +63,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) good_user_stack = true; } } else { - num_stack = e->kern_stack_size / sizeof(__u64); + num_stack = e.kern_stack_size / sizeof(__u64); if (env.jit_enabled) { good_kern_stack = num_stack > 0; } else { for (i = 0; i < num_stack; i++) { - ks = ksym_search(e->kern_stack[i]); + ks = ksym_search(e.kern_stack[i]); if (ks && (strcmp(ks->name, nonjit_func) == 0)) { good_kern_stack = true; break; } } } - if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) + if (e.user_stack_size > 0 && e.user_stack_buildid_size > 0) good_user_stack = true; } @@ -83,9 +89,8 @@ void test_get_stack_raw_tp(void) { const char *file = "./test_get_stack_rawtp.o"; const char *file_err = "./test_get_stack_rawtp_err.o"; - const char *prog_name = "raw_tracepoint/sys_enter"; + const char *prog_name = "bpf_prog1"; int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP; - struct perf_buffer_opts pb_opts = {}; struct perf_buffer *pb = NULL; struct bpf_link *link = NULL; struct timespec tv = {0, 10}; @@ -94,15 +99,15 @@ void test_get_stack_raw_tp(void) struct bpf_map *map; cpu_set_t cpu_set; - err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_test_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) goto close_prog; @@ -124,8 +129,8 @@ void test_get_stack_raw_tp(void) if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; - pb_opts.sample_cb = get_stack_print_output; - pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(map), 8, get_stack_print_output, + NULL, NULL, NULL); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index afd8639f9a94..9da131b32e13 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -136,7 +136,7 @@ void test_global_data(void) struct bpf_object *obj; int err, prog_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK(err, "load program", "error %d loading %s\n", err, file)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c index 8bcc2869102f..93a2439237b0 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c +++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c @@ -44,7 +44,7 @@ void test_global_func_args(void) struct bpf_object *obj; int err, prog_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK(err, "load program", "error %d loading %s\n", err, file)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index 01e51d16c8b8..ce10d2fc3a6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include <network_helpers.h> +#include "kfree_skb.skel.h" struct meta { int ifindex; @@ -58,63 +59,43 @@ void serial_test_kfree_skb(void) .ctx_in = &skb, .ctx_size_in = sizeof(skb), }; - struct bpf_prog_load_attr attr = { - .file = "./kfree_skb.o", - }; - - struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL; - struct bpf_map *perf_buf_map, *global_data; - struct bpf_program *prog, *fentry, *fexit; - struct bpf_object *obj, *obj2 = NULL; - struct perf_buffer_opts pb_opts = {}; + struct kfree_skb *skel = NULL; + struct bpf_link *link; + struct bpf_object *obj; struct perf_buffer *pb = NULL; - int err, kfree_skb_fd; + int err; bool passed = false; __u32 duration = 0; const int zero = 0; bool test_ok[2]; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) return; - err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd); - if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) - goto close_prog; - - prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); - if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) - goto close_prog; - fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans"); - if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n")) - goto close_prog; - fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans"); - if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) - goto close_prog; - - global_data = bpf_object__find_map_by_name(obj2, ".bss"); - if (CHECK(!global_data, "find global data", "not found\n")) + skel = kfree_skb__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kfree_skb_skel")) goto close_prog; - link = bpf_program__attach_raw_tracepoint(prog, NULL); + link = bpf_program__attach_raw_tracepoint(skel->progs.trace_kfree_skb, NULL); if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; - link_fentry = bpf_program__attach_trace(fentry); - if (!ASSERT_OK_PTR(link_fentry, "attach fentry")) - goto close_prog; - link_fexit = bpf_program__attach_trace(fexit); - if (!ASSERT_OK_PTR(link_fexit, "attach fexit")) + skel->links.trace_kfree_skb = link; + + link = bpf_program__attach_trace(skel->progs.fentry_eth_type_trans); + if (!ASSERT_OK_PTR(link, "attach fentry")) goto close_prog; + skel->links.fentry_eth_type_trans = link; - perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); - if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) + link = bpf_program__attach_trace(skel->progs.fexit_eth_type_trans); + if (!ASSERT_OK_PTR(link, "attach fexit")) goto close_prog; + skel->links.fexit_eth_type_trans = link; /* set up perf buffer */ - pb_opts.sample_cb = on_sample; - pb_opts.ctx = &passed; - pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, + on_sample, NULL, &passed, NULL); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; @@ -135,7 +116,7 @@ void serial_test_kfree_skb(void) */ ASSERT_TRUE(passed, "passed"); - err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); + err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.bss), &zero, test_ok); if (CHECK(err, "get_result", "failed to get output data: %d\n", err)) goto close_prog; @@ -143,9 +124,6 @@ void serial_test_kfree_skb(void) CHECK_FAIL(!test_ok[0] || !test_ok[1]); close_prog: perf_buffer__free(pb); - bpf_link__destroy(link); - bpf_link__destroy(link_fentry); - bpf_link__destroy(link_fexit); bpf_object__close(obj); - bpf_object__close(obj2); + kfree_skb__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 5c9c0176991b..7d7445ccc141 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -4,6 +4,7 @@ #include <network_helpers.h> #include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" +#include "kfunc_call_test_subprog.lskel.h" static void test_main(void) { @@ -49,6 +50,26 @@ static void test_subprog(void) kfunc_call_test_subprog__destroy(skel); } +static void test_subprog_lskel(void) +{ + struct kfunc_call_test_subprog_lskel *skel; + int prog_fd, retval, err; + + skel = kfunc_call_test_subprog_lskel__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + return; + + prog_fd = skel->progs.kfunc_call_test1.prog_fd; + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, (__u32 *)&retval, NULL); + ASSERT_OK(err, "bpf_prog_test_run(test1)"); + ASSERT_EQ(retval, 10, "test1-retval"); + ASSERT_NEQ(skel->data->active_res, -1, "active_res"); + ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res"); + + kfunc_call_test_subprog_lskel__destroy(skel); +} + void test_kfunc_call(void) { if (test__start_subtest("main")) @@ -56,4 +77,7 @@ void test_kfunc_call(void) if (test__start_subtest("subprog")) test_subprog(); + + if (test__start_subtest("subprog_lskel")) + test_subprog_lskel(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index 79f6bd1e50d6..f6933b06daf8 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -8,6 +8,7 @@ #include "test_ksyms_btf_null_check.skel.h" #include "test_ksyms_weak.skel.h" #include "test_ksyms_weak.lskel.h" +#include "test_ksyms_btf_write_check.skel.h" static int duration; @@ -137,6 +138,16 @@ cleanup: test_ksyms_weak_lskel__destroy(skel); } +static void test_write_check(void) +{ + struct test_ksyms_btf_write_check *skel; + + skel = test_ksyms_btf_write_check__open_and_load(); + ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n"); + + test_ksyms_btf_write_check__destroy(skel); +} + void test_ksyms_btf(void) { int percpu_datasec; @@ -167,4 +178,7 @@ void test_ksyms_btf(void) if (test__start_subtest("weak_ksyms_lskel")) test_weak_syms_lskel(); + + if (test__start_subtest("write_check")) + test_write_check(); } diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 8073105548ff..540ef28fabff 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -30,7 +30,7 @@ static void test_l4lb(const char *file) char buf[128]; u32 *magic = (u32 *)buf; - err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/legacy_printk.c b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c new file mode 100644 index 000000000000..ec6e45f2a644 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "test_legacy_printk.skel.h" + +static int execute_one_variant(bool legacy) +{ + struct test_legacy_printk *skel; + int err, zero = 0, my_pid = getpid(), res, map_fd; + + skel = test_legacy_printk__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return -errno; + + bpf_program__set_autoload(skel->progs.handle_legacy, legacy); + bpf_program__set_autoload(skel->progs.handle_modern, !legacy); + + err = test_legacy_printk__load(skel); + /* no ASSERT_OK, we expect one of two variants can fail here */ + if (err) + goto err_out; + + if (legacy) { + map_fd = bpf_map__fd(skel->maps.my_pid_map); + err = bpf_map_update_elem(map_fd, &zero, &my_pid, BPF_ANY); + if (!ASSERT_OK(err, "my_pid_map_update")) + goto err_out; + err = bpf_map_lookup_elem(map_fd, &zero, &res); + } else { + skel->bss->my_pid_var = my_pid; + } + + err = test_legacy_printk__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto err_out; + + usleep(1); /* trigger */ + + if (legacy) { + map_fd = bpf_map__fd(skel->maps.res_map); + err = bpf_map_lookup_elem(map_fd, &zero, &res); + if (!ASSERT_OK(err, "res_map_lookup")) + goto err_out; + } else { + res = skel->bss->res_var; + } + + if (!ASSERT_GT(res, 0, "res")) { + err = -EINVAL; + goto err_out; + } + +err_out: + test_legacy_printk__destroy(skel); + return err; +} + +void test_legacy_printk(void) +{ + /* legacy variant should work everywhere */ + ASSERT_OK(execute_one_variant(true /* legacy */), "legacy_case"); + + /* execute modern variant, can fail the load on old kernels */ + execute_one_variant(false); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c new file mode 100644 index 000000000000..9f766ddd946a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <bpf/btf.h> + +void test_libbpf_probe_prog_types(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_prog_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_prog_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + if (!ASSERT_OK_PTR(t, "bpf_prog_type_enum")) + goto cleanup; + + for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) { + const char *prog_type_name = btf__str_by_offset(btf, e->name_off); + enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val; + int res; + + if (prog_type == BPF_PROG_TYPE_UNSPEC) + continue; + + if (!test__start_subtest(prog_type_name)) + continue; + + res = libbpf_probe_bpf_prog_type(prog_type, NULL); + ASSERT_EQ(res, 1, prog_type_name); + } + +cleanup: + btf__free(btf); +} + +void test_libbpf_probe_map_types(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_map_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_map_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + if (!ASSERT_OK_PTR(t, "bpf_map_type_enum")) + goto cleanup; + + for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) { + const char *map_type_name = btf__str_by_offset(btf, e->name_off); + enum bpf_map_type map_type = (enum bpf_map_type)e->val; + int res; + + if (map_type == BPF_MAP_TYPE_UNSPEC) + continue; + + if (!test__start_subtest(map_type_name)) + continue; + + res = libbpf_probe_bpf_map_type(map_type, NULL); + ASSERT_EQ(res, 1, map_type_name); + } + +cleanup: + btf__free(btf); +} + +void test_libbpf_probe_helpers(void) +{ +#define CASE(prog, helper, supp) { \ + .prog_type_name = "BPF_PROG_TYPE_" # prog, \ + .helper_name = "bpf_" # helper, \ + .prog_type = BPF_PROG_TYPE_ ## prog, \ + .helper_id = BPF_FUNC_ ## helper, \ + .supported = supp, \ +} + const struct case_def { + const char *prog_type_name; + const char *helper_name; + enum bpf_prog_type prog_type; + enum bpf_func_id helper_id; + bool supported; + } cases[] = { + CASE(KPROBE, unspec, false), + CASE(KPROBE, map_lookup_elem, true), + CASE(KPROBE, loop, true), + + CASE(KPROBE, ktime_get_coarse_ns, false), + CASE(SOCKET_FILTER, ktime_get_coarse_ns, true), + + CASE(KPROBE, sys_bpf, false), + CASE(SYSCALL, sys_bpf, true), + }; + size_t case_cnt = ARRAY_SIZE(cases), i; + char buf[128]; + + for (i = 0; i < case_cnt; i++) { + const struct case_def *d = &cases[i]; + int res; + + snprintf(buf, sizeof(buf), "%s+%s", d->prog_type_name, d->helper_name); + + if (!test__start_subtest(buf)) + continue; + + res = libbpf_probe_bpf_helper(d->prog_type, d->helper_id, NULL); + ASSERT_EQ(res, d->supported, buf); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c index 5a2a689dbb68..4e0b2ec057aa 100644 --- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c +++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c @@ -27,7 +27,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; - err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, + err = bpf_prog_test_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK_FAIL(err)) goto close_server_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c new file mode 100644 index 000000000000..e469b023962b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <test_progs.h> +#include <bpf/btf.h> + +#include "test_log_buf.skel.h" + +static size_t libbpf_log_pos; +static char libbpf_log_buf[1024 * 1024]; +static bool libbpf_log_error; + +static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, va_list args) +{ + int emitted_cnt; + size_t left_cnt; + + left_cnt = sizeof(libbpf_log_buf) - libbpf_log_pos; + emitted_cnt = vsnprintf(libbpf_log_buf + libbpf_log_pos, left_cnt, fmt, args); + + if (emitted_cnt < 0 || emitted_cnt + 1 > left_cnt) { + libbpf_log_error = true; + return 0; + } + + libbpf_log_pos += emitted_cnt; + return 0; +} + +static void obj_load_log_buf(void) +{ + libbpf_print_fn_t old_print_cb = libbpf_set_print(libbpf_print_cb); + LIBBPF_OPTS(bpf_object_open_opts, opts); + const size_t log_buf_sz = 1024 * 1024; + struct test_log_buf* skel; + char *obj_log_buf, *good_log_buf, *bad_log_buf; + int err; + + obj_log_buf = malloc(3 * log_buf_sz); + if (!ASSERT_OK_PTR(obj_log_buf, "obj_log_buf")) + return; + + good_log_buf = obj_log_buf + log_buf_sz; + bad_log_buf = obj_log_buf + 2 * log_buf_sz; + obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0'; + + opts.kernel_log_buf = obj_log_buf; + opts.kernel_log_size = log_buf_sz; + opts.kernel_log_level = 4; /* for BTF this will turn into 1 */ + + /* In the first round every prog has its own log_buf, so libbpf logs + * don't have program failure logs + */ + skel = test_log_buf__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + /* set very verbose level for good_prog so we always get detailed logs */ + bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz); + bpf_program__set_log_level(skel->progs.good_prog, 2); + + bpf_program__set_log_buf(skel->progs.bad_prog, bad_log_buf, log_buf_sz); + /* log_level 0 with custom log_buf means that verbose logs are not + * requested if program load is successful, but libbpf should retry + * with log_level 1 on error and put program's verbose load log into + * custom log_buf + */ + bpf_program__set_log_level(skel->progs.bad_prog, 0); + + err = test_log_buf__load(skel); + if (!ASSERT_ERR(err, "unexpected_load_success")) + goto cleanup; + + ASSERT_FALSE(libbpf_log_error, "libbpf_log_error"); + + /* there should be no prog loading log because we specified per-prog log buf */ + ASSERT_NULL(strstr(libbpf_log_buf, "-- BEGIN PROG LOAD LOG --"), "unexp_libbpf_log"); + ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"), + "libbpf_log_not_empty"); + ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty"); + ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"), + "good_log_verbose"); + ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"), + "bad_log_not_empty"); + + if (env.verbosity > VERBOSE_NONE) { + printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf); + printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf); + printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf); + printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf); + } + + /* reset everything */ + test_log_buf__destroy(skel); + obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0'; + libbpf_log_buf[0] = '\0'; + libbpf_log_pos = 0; + libbpf_log_error = false; + + /* In the second round we let bad_prog's failure be logged through print callback */ + opts.kernel_log_buf = NULL; /* let everything through into print callback */ + opts.kernel_log_size = 0; + opts.kernel_log_level = 1; + + skel = test_log_buf__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + /* set normal verbose level for good_prog to check log_level is taken into account */ + bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz); + bpf_program__set_log_level(skel->progs.good_prog, 1); + + err = test_log_buf__load(skel); + if (!ASSERT_ERR(err, "unexpected_load_success")) + goto cleanup; + + ASSERT_FALSE(libbpf_log_error, "libbpf_log_error"); + + /* this time prog loading error should be logged through print callback */ + ASSERT_OK_PTR(strstr(libbpf_log_buf, "libbpf: prog 'bad_prog': -- BEGIN PROG LOAD LOG --"), + "libbpf_log_correct"); + ASSERT_STREQ(obj_log_buf, "", "obj_log__empty"); + ASSERT_STREQ(good_log_buf, "processed 4 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n", + "good_log_ok"); + ASSERT_STREQ(bad_log_buf, "", "bad_log_empty"); + + if (env.verbosity > VERBOSE_NONE) { + printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf); + printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf); + printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf); + printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf); + } + +cleanup: + free(obj_log_buf); + test_log_buf__destroy(skel); + libbpf_set_print(old_print_cb); +} + +static void bpf_prog_load_log_buf(void) +{ + const struct bpf_insn good_prog_insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + const size_t good_prog_insn_cnt = sizeof(good_prog_insns) / sizeof(struct bpf_insn); + const struct bpf_insn bad_prog_insns[] = { + BPF_EXIT_INSN(), + }; + size_t bad_prog_insn_cnt = sizeof(bad_prog_insns) / sizeof(struct bpf_insn); + LIBBPF_OPTS(bpf_prog_load_opts, opts); + const size_t log_buf_sz = 1024 * 1024; + char *log_buf; + int fd = -1; + + log_buf = malloc(log_buf_sz); + if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) + return; + opts.log_buf = log_buf; + opts.log_size = log_buf_sz; + + /* with log_level == 0 log_buf shoud stay empty for good prog */ + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", + good_prog_insns, good_prog_insn_cnt, &opts); + ASSERT_STREQ(log_buf, "", "good_log_0"); + ASSERT_GE(fd, 0, "good_fd1"); + if (fd >= 0) + close(fd); + fd = -1; + + /* log_level == 2 should always fill log_buf, even for good prog */ + log_buf[0] = '\0'; + opts.log_level = 2; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", + good_prog_insns, good_prog_insn_cnt, &opts); + ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(id=0,off=0,imm=0) R10=fp0"), "good_log_2"); + ASSERT_GE(fd, 0, "good_fd2"); + if (fd >= 0) + close(fd); + fd = -1; + + /* log_level == 0 should fill log_buf for bad prog */ + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "bad_prog", "GPL", + bad_prog_insns, bad_prog_insn_cnt, &opts); + ASSERT_OK_PTR(strstr(log_buf, "R0 !read_ok"), "bad_log_0"); + ASSERT_LT(fd, 0, "bad_fd"); + if (fd >= 0) + close(fd); + fd = -1; + + free(log_buf); +} + +static void bpf_btf_load_log_buf(void) +{ + LIBBPF_OPTS(bpf_btf_load_opts, opts); + const size_t log_buf_sz = 1024 * 1024; + const void *raw_btf_data; + __u32 raw_btf_size; + struct btf *btf; + char *log_buf; + int fd = -1; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good")) + goto cleanup; + + log_buf = malloc(log_buf_sz); + if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) + goto cleanup; + opts.log_buf = log_buf; + opts.log_size = log_buf_sz; + + /* with log_level == 0 log_buf shoud stay empty for good BTF */ + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts); + ASSERT_STREQ(log_buf, "", "good_log_0"); + ASSERT_GE(fd, 0, "good_fd1"); + if (fd >= 0) + close(fd); + fd = -1; + + /* log_level == 2 should always fill log_buf, even for good BTF */ + log_buf[0] = '\0'; + opts.log_level = 2; + fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts); + printf("LOG_BUF: %s\n", log_buf); + ASSERT_OK_PTR(strstr(log_buf, "magic: 0xeb9f"), "good_log_2"); + ASSERT_GE(fd, 0, "good_fd2"); + if (fd >= 0) + close(fd); + fd = -1; + + /* make BTF bad, add pointer pointing to non-existing type */ + ASSERT_GT(btf__add_ptr(btf, 100), 0, "bad_ptr_type"); + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_bad")) + goto cleanup; + + /* log_level == 0 should fill log_buf for bad BTF */ + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts); + printf("LOG_BUF: %s\n", log_buf); + ASSERT_OK_PTR(strstr(log_buf, "[2] PTR (anon) type_id=100 Invalid type_id"), "bad_log_0"); + ASSERT_LT(fd, 0, "bad_fd"); + if (fd >= 0) + close(fd); + fd = -1; + +cleanup: + free(log_buf); + btf__free(btf); +} + +void test_log_buf(void) +{ + if (test__start_subtest("obj_load_log_buf")) + obj_load_log_buf(); + if (test__start_subtest("bpf_prog_load_log_buf")) + bpf_prog_load_log_buf(); + if (test__start_subtest("bpf_btf_load_log_buf")) + bpf_btf_load_log_buf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index ce17b1ed8709..23d19e9cf26a 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -53,9 +53,9 @@ void test_map_lock(void) int err = 0, key = 0, i; void *ret; - err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK_FAIL(err)) { - printf("test_map_lock:bpf_prog_load errno %d\n", errno); + printf("test_map_lock:bpf_prog_test_load errno %d\n", errno); goto close_prog; } map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c index 4972f92205c7..273725504f11 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -4,31 +4,29 @@ #include <test_progs.h> #include <network_helpers.h> -#include "map_ptr_kern.skel.h" +#include "map_ptr_kern.lskel.h" void test_map_ptr(void) { - struct map_ptr_kern *skel; + struct map_ptr_kern_lskel *skel; __u32 duration = 0, retval; char buf[128]; int err; int page_size = getpagesize(); - skel = map_ptr_kern__open(); + skel = map_ptr_kern_lskel__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; - err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size); - if (!ASSERT_OK(err, "bpf_map__set_max_entries")) - goto cleanup; + skel->maps.m_ringbuf.max_entries = page_size; - err = map_ptr_kern__load(skel); + err = map_ptr_kern_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) goto cleanup; skel->bss->page_size = page_size; - err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, + err = bpf_prog_test_run(skel->progs.cg_skb.prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, NULL, &retval, NULL); if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) @@ -39,5 +37,5 @@ void test_map_ptr(void) goto cleanup; cleanup: - map_ptr_kern__destroy(skel); + map_ptr_kern_lskel__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c index 7589c03fd26b..eb2feaac81fe 100644 --- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -204,8 +204,8 @@ static int pass_ack(struct migrate_reuseport_test_case *test_case) { int err; - err = bpf_link__detach(test_case->link); - if (!ASSERT_OK(err, "bpf_link__detach")) + err = bpf_link__destroy(test_case->link); + if (!ASSERT_OK(err, "bpf_link__destroy")) return -1; test_case->link = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 4e32f3586a75..5fc2b3a0711e 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -47,7 +47,6 @@ void serial_test_perf_buffer(void) { int err, on_len, nr_on_cpus = 0, nr_cpus, i, j; int zero = 0, my_pid = getpid(); - struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; cpu_set_t cpu_seen; struct perf_buffer *pb; @@ -82,9 +81,8 @@ void serial_test_perf_buffer(void) goto out_close; /* set up perf buffer */ - pb_opts.sample_cb = on_sample; - pb_opts.ctx = &cpu_seen; - pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, + on_sample, NULL, &cpu_seen, NULL); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out_close; diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index d4b953ae3407..31c09ba577eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -241,8 +241,8 @@ void test_pinning(void) goto out; } - map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), - sizeof(__u64), 1, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(__u32), + sizeof(__u64), 1, NULL); if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c index 44b514fabccd..6628710ec3c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c @@ -9,7 +9,7 @@ void test_pkt_access(void) __u32 duration, retval; int err, prog_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c index 939015cd6dba..c9d2d6a1bfcc 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c @@ -9,7 +9,7 @@ void test_pkt_md_access(void) __u32 duration, retval; int err, prog_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/prog_array_init.c b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c new file mode 100644 index 000000000000..fc4657619739 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include <test_progs.h> +#include "test_prog_array_init.skel.h" + +void test_prog_array_init(void) +{ + struct test_prog_array_init *skel; + int err; + + skel = test_prog_array_init__open(); + if (!ASSERT_OK_PTR(skel, "could not open BPF object")) + return; + + skel->rodata->my_pid = getpid(); + + err = test_prog_array_init__load(skel); + if (!ASSERT_OK(err, "could not load BPF object")) + goto cleanup; + + skel->links.entry = bpf_program__attach_raw_tracepoint(skel->progs.entry, "sys_enter"); + if (!ASSERT_OK_PTR(skel->links.entry, "could not attach BPF program")) + goto cleanup; + + usleep(1); + + ASSERT_EQ(skel->bss->value, 42, "unexpected value"); + +cleanup: + test_prog_array_init__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index f47e7b1cb32c..b9822f914eeb 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -14,7 +14,7 @@ static void test_queue_stack_map_by_type(int type) int i, err, prog_fd, map_in_fd, map_out_fd; char file[32], buf[128]; struct bpf_object *obj; - struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + struct iphdr iph; /* Fill test values to be used */ for (i = 0; i < MAP_SIZE; i++) @@ -27,7 +27,7 @@ static void test_queue_stack_map_by_type(int type) else return; - err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -60,15 +60,17 @@ static void test_queue_stack_map_by_type(int type) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - if (err || retval || size != sizeof(pkt_v4) || - iph->daddr != val) + if (err || retval || size != sizeof(pkt_v4)) + break; + memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); + if (iph.daddr != val) break; } - CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, + CHECK(err || retval || size != sizeof(pkt_v4) || iph.daddr != val, "bpf_map_pop_elem", "err %d errno %d retval %d size %d iph->daddr %u\n", - err, errno, retval, size, iph->daddr); + err, errno, retval, size, iph.daddr); /* Queue is empty, program should return TC_ACT_SHOT */ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c index 9807336a3016..e2f1445b0e10 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c @@ -18,15 +18,15 @@ void test_raw_tp_writable_reject_nbd_invalid(void) BPF_EXIT_INSN(), }; - struct bpf_load_program_attr load_attr = { - .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, - .license = "GPL v2", - .insns = program, - .insns_cnt = sizeof(program) / sizeof(struct bpf_insn), + LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_level = 2, - }; + .log_buf = error, + .log_size = sizeof(error), + ); - bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2", + program, sizeof(program) / sizeof(struct bpf_insn), + &opts); if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load", "failed: %d errno %d\n", bpf_fd, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c index ddefa1192e5d..239baccabccb 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c @@ -17,15 +17,15 @@ void serial_test_raw_tp_writable_test_run(void) BPF_EXIT_INSN(), }; - struct bpf_load_program_attr load_attr = { - .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, - .license = "GPL v2", - .insns = trace_program, - .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .log_level = 2, - }; + .log_buf = error, + .log_size = sizeof(error), + ); - int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + int bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2", + trace_program, sizeof(trace_program) / sizeof(struct bpf_insn), + &trace_opts); if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded", "failed: %d errno %d\n", bpf_fd, errno)) return; @@ -35,15 +35,14 @@ void serial_test_raw_tp_writable_test_run(void) BPF_EXIT_INSN(), }; - struct bpf_load_program_attr skb_load_attr = { - .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .license = "GPL v2", - .insns = skb_program, - .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), - }; + LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, + .log_buf = error, + .log_size = sizeof(error), + ); - int filter_fd = - bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + int filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL v2", + skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), + &skb_opts); if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", filter_fd, errno)) goto out_bpffd; diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 167cd8a2edfd..e945195b24c9 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -62,8 +62,8 @@ void test_ringbuf_multi(void) if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) goto cleanup; - proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); - if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n")) + proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); + if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 3cfc910ab3c1..1cbd8cd64044 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -18,7 +18,6 @@ #include <netinet/in.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> -#include "bpf_rlimit.h" #include "bpf_util.h" #include "test_progs.h" @@ -66,29 +65,20 @@ static union sa46 { static int create_maps(enum bpf_map_type inner_type) { - struct bpf_create_map_attr attr = {}; + LIBBPF_OPTS(bpf_map_create_opts, opts); inner_map_type = inner_type; /* Creating reuseport_array */ - attr.name = "reuseport_array"; - attr.map_type = inner_type; - attr.key_size = sizeof(__u32); - attr.value_size = sizeof(__u32); - attr.max_entries = REUSEPORT_ARRAY_SIZE; - - reuseport_array = bpf_create_map_xattr(&attr); + reuseport_array = bpf_map_create(inner_type, "reuseport_array", + sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL); RET_ERR(reuseport_array < 0, "creating reuseport_array", "reuseport_array:%d errno:%d\n", reuseport_array, errno); /* Creating outer_map */ - attr.name = "outer_map"; - attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS; - attr.key_size = sizeof(__u32); - attr.value_size = sizeof(__u32); - attr.max_entries = 1; - attr.inner_map_fd = reuseport_array; - outer_map = bpf_create_map_xattr(&attr); + opts.inner_map_fd = reuseport_array; + outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map", + sizeof(__u32), sizeof(__u32), 1, &opts); RET_ERR(outer_map < 0, "creating outer_map", "outer_map:%d errno:%d\n", outer_map, errno); diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c index fdfdcff6cbef..aecfe662c070 100644 --- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c +++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c @@ -22,7 +22,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type) prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN(); - prog_fd = bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), + prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); CHECK(prog_fd < 0, "test-run", "errno %d\n", errno); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 6db07401bc49..597d0467a926 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -30,7 +30,6 @@ #include <bpf/bpf.h> #include "test_progs.h" -#include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" #include "network_helpers.h" @@ -937,6 +936,37 @@ static void test_drop_on_lookup(struct test_sk_lookup *skel) .connect_to = { EXT_IP6, EXT_PORT }, .listen_at = { EXT_IP6, INT_PORT }, }, + /* The program will drop on success, meaning that the ifindex + * was 1. + */ + { + .desc = "TCP IPv4 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "TCP IPv6 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_STREAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, EXT_PORT }, + }, + { + .desc = "UDP IPv4 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP4, EXT_PORT }, + .listen_at = { EXT_IP4, EXT_PORT }, + }, + { + .desc = "UDP IPv6 drop on valid ifindex", + .lookup_prog = skel->progs.check_ifindex, + .sotype = SOCK_DGRAM, + .connect_to = { EXT_IP6, EXT_PORT }, + .listen_at = { EXT_IP6, EXT_PORT }, + }, }; const struct test *t; diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index c437e6ba8fe2..b5319ba2ee27 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -32,7 +32,7 @@ void test_skb_ctx(void) int err; int i; - err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) return; @@ -111,4 +111,6 @@ void test_skb_ctx(void) "ctx_out_mark", "skb->mark == %u, expected %d\n", skb.mark, 10); + + bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c index f302ad84a298..6f802a1c0800 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c @@ -20,7 +20,7 @@ void test_skb_helpers(void) struct bpf_object *obj; int err; - err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &tattr.prog_fd); if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index fae40db4d81f..9fc040eaa482 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -15,7 +15,6 @@ #include "network_helpers.h" #include "cgroup_helpers.h" #include "test_progs.h" -#include "bpf_rlimit.h" #include "test_sock_fields.skel.h" enum bpf_linum_array_idx { diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 1352ec104149..85db0f4cdd95 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -91,9 +91,9 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) if (CHECK_FAIL(s < 0)) return; - map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); + map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); if (CHECK_FAIL(map < 0)) { - perror("bpf_create_map"); + perror("bpf_cmap_create"); goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 7a0d64fdc192..af293ea1542c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -97,7 +97,7 @@ static void run_tests(int family, enum bpf_map_type map_type) char test_name[MAX_TEST_NAME]; int map; - map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); + map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); if (CHECK_FAIL(map < 0)) { perror("bpf_map_create"); return; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 2a9cb951bfd6..7e21bfab6358 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -502,8 +502,8 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd) if (s < 0) return; - mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key), - sizeof(value32), 1, 0); + mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), + sizeof(value32), 1, NULL); if (mapfd < 0) { FAIL_ERRNO("map_create"); goto close; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 3e8517a8395a..cd09f4c7dd92 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -852,22 +852,21 @@ static struct sockopt_test { static int load_prog(const struct bpf_insn *insns, enum bpf_attach_type expected_attach_type) { - struct bpf_load_program_attr attr = { - .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT, + LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = expected_attach_type, - .insns = insns, - .license = "GPL", .log_level = 2, - }; - int fd; + .log_buf = bpf_log_buf, + .log_size = sizeof(bpf_log_buf), + ); + int fd, insns_cnt = 0; for (; - insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT); - attr.insns_cnt++) { + insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); + insns_cnt++) { } - attr.insns_cnt++; + insns_cnt++; - fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf)); + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts); if (verbose && fd < 0) fprintf(stderr, "%s\n", bpf_log_buf); diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 86f97681ad89..8ed78a9383ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -136,7 +136,8 @@ static int start_server(void) return fd; } -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, + const char *prog_name) { enum bpf_attach_type attach_type; enum bpf_prog_type prog_type; @@ -145,20 +146,20 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); if (err) { - log_err("Failed to deduct types for %s BPF program", title); + log_err("Failed to deduct types for %s BPF program", prog_name); return -1; } - prog = bpf_object__find_program_by_title(obj, title); + prog = bpf_object__find_program_by_name(obj, prog_name); if (!prog) { - log_err("Failed to find %s BPF program", title); + log_err("Failed to find %s BPF program", prog_name); return -1; } err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, attach_type, 0); if (err) { - log_err("Failed to attach %s BPF program", title); + log_err("Failed to attach %s BPF program", prog_name); return -1; } @@ -167,25 +168,25 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) static void run_test(int cgroup_fd) { - struct bpf_prog_load_attr attr = { - .file = "./sockopt_inherit.o", - }; int server_fd = -1, client_fd; struct bpf_object *obj; void *server_err; pthread_t tid; - int ignored; int err; - err = bpf_prog_load_xattr(&attr, &obj, &ignored); - if (CHECK_FAIL(err)) + obj = bpf_object__open_file("sockopt_inherit.o", NULL); + if (!ASSERT_OK_PTR(obj, "obj_open")) return; - err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "obj_load")) + goto close_bpf_object; + + err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt"); if (CHECK_FAIL(err)) goto close_bpf_object; - err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); + err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt"); if (CHECK_FAIL(err)) goto close_bpf_object; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index bc34f7773444..abce12ddcc37 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -297,14 +297,10 @@ detach: void test_sockopt_multi(void) { - struct bpf_prog_load_attr attr = { - .file = "./sockopt_multi.o", - }; int cg_parent = -1, cg_child = -1; struct bpf_object *obj = NULL; int sock_fd = -1; int err = -1; - int ignored; cg_parent = test__join_cgroup("/parent"); if (CHECK_FAIL(cg_parent < 0)) @@ -314,8 +310,12 @@ void test_sockopt_multi(void) if (CHECK_FAIL(cg_child < 0)) goto out; - err = bpf_prog_load_xattr(&attr, &obj, &ignored); - if (CHECK_FAIL(err)) + obj = bpf_object__open_file("sockopt_multi.o", NULL); + if (!ASSERT_OK_PTR(obj, "obj_load")) + goto out; + + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "obj_load")) goto out; sock_fd = socket(AF_INET, SOCK_STREAM, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 7577a77a4c4c..6307f5d2b417 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -24,9 +24,9 @@ void test_spinlock(void) int err = 0, i; void *ret; - err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); if (CHECK_FAIL(err)) { - printf("test_spin_lock:bpf_prog_load errno %d\n", errno); + printf("test_spin_lock:bpf_prog_test_load errno %d\n", errno); goto close_prog; } for (i = 0; i < 4; i++) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 04b476bd62b9..313f0a66232e 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -4,7 +4,7 @@ void test_stacktrace_map(void) { int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "tracepoint/sched/sched_switch"; + const char *prog_name = "oncpu"; int err, prog_fd, stack_trace_len; const char *file = "./test_stacktrace_map.o"; __u32 key, val, duration = 0; @@ -12,11 +12,11 @@ void test_stacktrace_map(void) struct bpf_object *obj; struct bpf_link *link; - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) return; - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index 4fd30bb651ad..1cb8dd36bd8f 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -3,7 +3,7 @@ void test_stacktrace_map_raw_tp(void) { - const char *prog_name = "tracepoint/sched/sched_switch"; + const char *prog_name = "oncpu"; int control_map_fd, stackid_hmap_fd, stackmap_fd; const char *file = "./test_stacktrace_map.o"; __u32 key, val, duration = 0; @@ -12,11 +12,11 @@ void test_stacktrace_map_raw_tp(void) struct bpf_object *obj; struct bpf_link *link = NULL; - err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - prog = bpf_object__find_program_by_title(obj, prog_name); + prog = bpf_object__find_program_by_name(obj, prog_name); if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) goto close_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 9825f1f7bfcc..5dc0f425bd11 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -16,7 +16,7 @@ static void test_tailcall_1(void) char prog_name[32]; char buff[128] = {}; - err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -154,7 +154,7 @@ static void test_tailcall_2(void) char prog_name[32]; char buff[128] = {}; - err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -228,7 +228,7 @@ static void test_tailcall_count(const char *which) __u32 retval, duration; char buff[128] = {}; - err = bpf_prog_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -324,7 +324,7 @@ static void test_tailcall_4(void) char buff[128] = {}; char prog_name[32]; - err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -412,7 +412,7 @@ static void test_tailcall_5(void) char buff[128] = {}; char prog_name[32]; - err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -498,7 +498,7 @@ static void test_tailcall_bpf2bpf_1(void) __u32 retval, duration; char prog_name[32]; - err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -582,7 +582,7 @@ static void test_tailcall_bpf2bpf_2(void) __u32 retval, duration; char buff[128] = {}; - err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -660,7 +660,7 @@ static void test_tailcall_bpf2bpf_3(void) __u32 retval, duration; char prog_name[32]; - err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -757,7 +757,7 @@ static void test_tailcall_bpf2bpf_4(bool noise) __u32 retval, duration; char prog_name[32]; - err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, + err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c index 1bdc1d86a50c..17947c9e1d66 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c @@ -11,7 +11,7 @@ void test_task_fd_query_rawtp(void) __u32 duration = 0; char buf[256]; - err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c index 3f131b8fe328..c2a98a7a8dfc 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c @@ -13,8 +13,8 @@ static void test_task_fd_query_tp_core(const char *probe_name, __u32 duration = 0; char buf[256]; - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno)) goto close_prog; snprintf(buf, sizeof(buf), diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 4b18b73df10b..c2426df58e17 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd) if (!ASSERT_OK(err, "unshare")) return err; + /* Make our /sys mount private, so the following umount won't + * trigger the global umount in case it's shared. + */ + err = mount("none", "/sys", NULL, MS_PRIVATE, NULL); + if (!ASSERT_OK(err, "remount private /sys")) + return err; + err = umount2("/sys", MNT_DETACH); if (!ASSERT_OK(err, "umount2 /sys")) return err; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c index 594307dffd13..11bf755be4c9 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c @@ -8,7 +8,7 @@ void test_tcp_estats(void) struct bpf_object *obj; __u32 duration = 0; - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); CHECK(err, "", "err %d errno %d\n", err, errno); if (err) return; diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 265b4fe33ec3..96ff2c20af81 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -2,6 +2,7 @@ #include <test_progs.h> #include "cgroup_helpers.h" #include "network_helpers.h" +#include "tcp_rtt.skel.h" struct tcp_rtt_storage { __u32 invoked; @@ -91,26 +92,18 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, static int run_test(int cgroup_fd, int server_fd) { - struct bpf_prog_load_attr attr = { - .prog_type = BPF_PROG_TYPE_SOCK_OPS, - .file = "./tcp_rtt.o", - .expected_attach_type = BPF_CGROUP_SOCK_OPS, - }; - struct bpf_object *obj; - struct bpf_map *map; + struct tcp_rtt *skel; int client_fd; int prog_fd; int map_fd; int err; - err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); - if (err) { - log_err("Failed to load BPF object"); + skel = tcp_rtt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load")) return -1; - } - map = bpf_object__next_map(obj, NULL); - map_fd = bpf_map__fd(map); + map_fd = bpf_map__fd(skel->maps.socket_storage_map); + prog_fd = bpf_program__fd(skel->progs._sockops); err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { @@ -149,7 +142,7 @@ close_client_fd: close(client_fd); close_bpf_object: - bpf_object__close(obj); + tcp_rtt__destroy(skel); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c index d29ebfeef9c5..214d9f4a94a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -19,11 +19,13 @@ static int read_iter(char *file) fd = open(file, 0); if (fd < 0) return -1; - while ((len = read(fd, buf, sizeof(buf))) > 0) + while ((len = read(fd, buf, sizeof(buf))) > 0) { + buf[sizeof(buf) - 1] = '\0'; if (strstr(buf, "iter")) { close(fd); return 0; } + } close(fd); return -1; } @@ -80,7 +82,7 @@ static int fn(void) if (!ASSERT_OK(err, "creating " TDIR "/fs1/b")) goto out; - map = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); if (!ASSERT_GT(map, 0, "create_map(ARRAY)")) goto out; err = bpf_obj_pin(map, TDIR "/fs1/c"); diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 7e13129f593a..509e21d5cb9d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -30,17 +30,29 @@ extern int extra_prog_load_log_flags; static int check_load(const char *file) { - struct bpf_prog_load_attr attr; struct bpf_object *obj = NULL; - int err, prog_fd; + struct bpf_program *prog; + int err; - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = BPF_PROG_TYPE_UNSPEC; - attr.log_level = extra_prog_load_log_flags; - attr.prog_flags = BPF_F_TEST_RND_HI32; found = false; - err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (err) + return err; + + prog = bpf_object__next_program(obj, NULL); + if (!prog) { + err = -ENOENT; + goto err_out; + } + + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_log_level(prog, extra_prog_load_log_flags); + + err = bpf_object__load(obj); + +err_out: bpf_object__close(obj); return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c index d2c16eaae367..26ac26a88026 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -28,10 +28,6 @@ static unsigned int duration; struct storage { void *inode; unsigned int value; - /* Lock ensures that spin locked versions of local stoage operations - * also work, most operations in this tests are still single threaded - */ - struct bpf_spin_lock lock; }; /* Fork and exec the provided rm binary and return the exit code of the @@ -66,27 +62,24 @@ static int run_self_unlink(int *monitored_pid, const char *rm_path) static bool check_syscall_operations(int map_fd, int obj_fd) { - struct storage val = { .value = TEST_STORAGE_VALUE, .lock = { 0 } }, - lookup_val = { .value = 0, .lock = { 0 } }; + struct storage val = { .value = TEST_STORAGE_VALUE }, + lookup_val = { .value = 0 }; int err; /* Looking up an existing element should fail initially */ - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, - BPF_F_LOCK); + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", "err:%d errno:%d\n", err, errno)) return false; /* Create a new element */ - err = bpf_map_update_elem(map_fd, &obj_fd, &val, - BPF_NOEXIST | BPF_F_LOCK); + err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST); if (CHECK(err < 0, "bpf_map_update_elem", "err:%d errno:%d\n", err, errno)) return false; /* Lookup the newly created element */ - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, - BPF_F_LOCK); + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); if (CHECK(err < 0, "bpf_map_lookup_elem", "err:%d errno:%d", err, errno)) return false; @@ -102,8 +95,7 @@ static bool check_syscall_operations(int map_fd, int obj_fd) return false; /* The lookup should fail, now that the element has been deleted */ - err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, - BPF_F_LOCK); + err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0); if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", "err:%d errno:%d\n", err, errno)) return false; diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 123c68c1917d..05acb376f74d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -56,11 +56,11 @@ static void setaffinity(void) void test_test_overhead(void) { - const char *kprobe_name = "kprobe/__set_task_comm"; - const char *kretprobe_name = "kretprobe/__set_task_comm"; - const char *raw_tp_name = "raw_tp/task_rename"; - const char *fentry_name = "fentry/__set_task_comm"; - const char *fexit_name = "fexit/__set_task_comm"; + const char *kprobe_name = "prog1"; + const char *kretprobe_name = "prog2"; + const char *raw_tp_name = "prog3"; + const char *fentry_name = "prog4"; + const char *fexit_name = "prog5"; const char *kprobe_func = "__set_task_comm"; struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; struct bpf_program *fentry_prog, *fexit_prog; @@ -76,23 +76,23 @@ void test_test_overhead(void) if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); + kprobe_prog = bpf_object__find_program_by_name(obj, kprobe_name); if (CHECK(!kprobe_prog, "find_probe", "prog '%s' not found\n", kprobe_name)) goto cleanup; - kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); + kretprobe_prog = bpf_object__find_program_by_name(obj, kretprobe_name); if (CHECK(!kretprobe_prog, "find_probe", "prog '%s' not found\n", kretprobe_name)) goto cleanup; - raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name); + raw_tp_prog = bpf_object__find_program_by_name(obj, raw_tp_name); if (CHECK(!raw_tp_prog, "find_probe", "prog '%s' not found\n", raw_tp_name)) goto cleanup; - fentry_prog = bpf_object__find_program_by_title(obj, fentry_name); + fentry_prog = bpf_object__find_program_by_name(obj, fentry_name); if (CHECK(!fentry_prog, "find_probe", "prog '%s' not found\n", fentry_name)) goto cleanup; - fexit_prog = bpf_object__find_program_by_title(obj, fexit_name); + fexit_prog = bpf_object__find_program_by_name(obj, fexit_name); if (CHECK(!fexit_prog, "find_probe", "prog '%s' not found\n", fexit_name)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c new file mode 100644 index 000000000000..b57a3009465f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <test_progs.h> +#include "strncmp_test.skel.h" + +static int trigger_strncmp(const struct strncmp_test *skel) +{ + int cmp; + + usleep(1); + + cmp = skel->bss->cmp_ret; + if (cmp > 0) + return 1; + if (cmp < 0) + return -1; + return 0; +} + +/* + * Compare str and target after making str[i] != target[i]. + * When exp is -1, make str[i] < target[i] and delta = -1. + */ +static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name, + int exp) +{ + size_t nr = sizeof(skel->bss->str); + char *str = skel->bss->str; + int delta = exp; + int got; + size_t i; + + memcpy(str, skel->rodata->target, nr); + for (i = 0; i < nr - 1; i++) { + str[i] += delta; + + got = trigger_strncmp(skel); + ASSERT_EQ(got, exp, name); + + str[i] -= delta; + } +} + +static void test_strncmp_ret(void) +{ + struct strncmp_test *skel; + struct bpf_program *prog; + int err, got; + + skel = strncmp_test__open(); + if (!ASSERT_OK_PTR(skel, "strncmp_test open")) + return; + + bpf_object__for_each_program(prog, skel->obj) + bpf_program__set_autoload(prog, false); + + bpf_program__set_autoload(skel->progs.do_strncmp, true); + + err = strncmp_test__load(skel); + if (!ASSERT_EQ(err, 0, "strncmp_test load")) + goto out; + + err = strncmp_test__attach(skel); + if (!ASSERT_EQ(err, 0, "strncmp_test attach")) + goto out; + + skel->bss->target_pid = getpid(); + + /* Empty str */ + skel->bss->str[0] = '\0'; + got = trigger_strncmp(skel); + ASSERT_EQ(got, -1, "strncmp: empty str"); + + /* Same string */ + memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str)); + got = trigger_strncmp(skel); + ASSERT_EQ(got, 0, "strncmp: same str"); + + /* Not-null-termainted string */ + memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str)); + skel->bss->str[sizeof(skel->bss->str) - 1] = 'A'; + got = trigger_strncmp(skel); + ASSERT_EQ(got, 1, "strncmp: not-null-term str"); + + strncmp_full_str_cmp(skel, "strncmp: less than", -1); + strncmp_full_str_cmp(skel, "strncmp: greater than", 1); +out: + strncmp_test__destroy(skel); +} + +static void test_strncmp_bad_not_const_str_size(void) +{ + struct strncmp_test *skel; + struct bpf_program *prog; + int err; + + skel = strncmp_test__open(); + if (!ASSERT_OK_PTR(skel, "strncmp_test open")) + return; + + bpf_object__for_each_program(prog, skel->obj) + bpf_program__set_autoload(prog, false); + + bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, + true); + + err = strncmp_test__load(skel); + ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size"); + + strncmp_test__destroy(skel); +} + +static void test_strncmp_bad_writable_target(void) +{ + struct strncmp_test *skel; + struct bpf_program *prog; + int err; + + skel = strncmp_test__open(); + if (!ASSERT_OK_PTR(skel, "strncmp_test open")) + return; + + bpf_object__for_each_program(prog, skel->obj) + bpf_program__set_autoload(prog, false); + + bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, + true); + + err = strncmp_test__load(skel); + ASSERT_ERR(err, "strncmp_test load bad_writable_target"); + + strncmp_test__destroy(skel); +} + +static void test_strncmp_bad_not_null_term_target(void) +{ + struct strncmp_test *skel; + struct bpf_program *prog; + int err; + + skel = strncmp_test__open(); + if (!ASSERT_OK_PTR(skel, "strncmp_test open")) + return; + + bpf_object__for_each_program(prog, skel->obj) + bpf_program__set_autoload(prog, false); + + bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, + true); + + err = strncmp_test__load(skel); + ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target"); + + strncmp_test__destroy(skel); +} + +void test_test_strncmp(void) +{ + if (test__start_subtest("strncmp_ret")) + test_strncmp_ret(); + if (test__start_subtest("strncmp_bad_not_const_str_size")) + test_strncmp_bad_not_const_str_size(); + if (test__start_subtest("strncmp_bad_writable_target")) + test_strncmp_bad_writable_target(); + if (test__start_subtest("strncmp_bad_not_null_term_target")) + test_strncmp_bad_not_null_term_target(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c index 8652d0a46c87..39e79291c82b 100644 --- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c +++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c @@ -35,7 +35,7 @@ void serial_test_tp_attach_query(void) query = malloc(sizeof(*query) + sizeof(__u32) * num_progs); for (i = 0; i < num_progs; i++) { - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], + err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i], &prog_fd[i]); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) goto cleanup1; diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index fc146671b20a..9c795ee52b7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -35,7 +35,7 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name) struct bpf_program *prog; int duration = 0; - prog = bpf_object__find_program_by_title(obj, name); + prog = bpf_object__find_program_by_name(obj, name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) return ERR_PTR(-EINVAL); return bpf_program__attach_trace(prog); @@ -44,8 +44,8 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name) /* TODO: use different target function to run in concurrent mode */ void serial_test_trampoline_count(void) { - const char *fentry_name = "fentry/__set_task_comm"; - const char *fexit_name = "fexit/__set_task_comm"; + const char *fentry_name = "prog1"; + const char *fexit_name = "prog2"; const char *object = "test_trampoline_count.o"; struct inst inst[MAX_TRAMP_PROGS] = {}; int err, i = 0, duration = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c index 48921ff74850..ac65456b7ab8 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp.c @@ -11,12 +11,12 @@ void test_xdp(void) const char *file = "./test_xdp.o"; struct bpf_object *obj; char buf[128]; - struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr); - struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + struct ipv6hdr iph6; + struct iphdr iph; __u32 duration, retval, size; int err, prog_fd, map_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -28,16 +28,17 @@ void test_xdp(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - + memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); CHECK(err || retval != XDP_TX || size != 74 || - iph->protocol != IPPROTO_IPIP, "ipv4", + iph.protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); + memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6)); CHECK(err || retval != XDP_TX || size != 114 || - iph6->nexthdr != IPPROTO_IPV6, "ipv6", + iph6.nexthdr != IPPROTO_IPV6, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); out: diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index f529e3c923ae..3f5a17c38be5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -10,7 +10,7 @@ static void test_xdp_adjust_tail_shrink(void) int err, prog_fd; char buf[128]; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -38,7 +38,7 @@ static void test_xdp_adjust_tail_grow(void) __u32 duration, retval, size, expect_sz; int err, prog_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; @@ -75,7 +75,7 @@ static void test_xdp_adjust_tail_grow2(void) .data_size_out = 0, /* Per test */ }; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index 4c4057262cd8..c6fa390e3aa1 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -16,7 +16,7 @@ void serial_test_xdp_attach(void) len = sizeof(info); - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1); if (CHECK_FAIL(err)) return; err = bpf_obj_get_info_by_fd(fd1, &info, &len); @@ -24,7 +24,7 @@ void serial_test_xdp_attach(void) goto out_1; id1 = info.id; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2); if (CHECK_FAIL(err)) goto out_1; @@ -34,7 +34,7 @@ void serial_test_xdp_attach(void) goto out_2; id2 = info.id; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3); if (CHECK_FAIL(err)) goto out_2; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index faa22b84f2ee..5e3a26b15ec6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -218,9 +218,9 @@ static int send_udp_packets(int vary_dst_ip) .h_dest = BOND2_MAC, .h_proto = htons(ETH_P_IP), }; - uint8_t buf[128] = {}; - struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh)); - struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph)); + struct iphdr iph = {}; + struct udphdr uh = {}; + uint8_t buf[128]; int i, s = -1; int ifindex; @@ -232,17 +232,16 @@ static int send_udp_packets(int vary_dst_ip) if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex")) goto err; - memcpy(buf, &eh, sizeof(eh)); - iph->ihl = 5; - iph->version = 4; - iph->tos = 16; - iph->id = 1; - iph->ttl = 64; - iph->protocol = IPPROTO_UDP; - iph->saddr = 1; - iph->daddr = 2; - iph->tot_len = htons(sizeof(buf) - ETH_HLEN); - iph->check = 0; + iph.ihl = 5; + iph.version = 4; + iph.tos = 16; + iph.id = 1; + iph.ttl = 64; + iph.protocol = IPPROTO_UDP; + iph.saddr = 1; + iph.daddr = 2; + iph.tot_len = htons(sizeof(buf) - ETH_HLEN); + iph.check = 0; for (i = 1; i <= NPACKETS; i++) { int n; @@ -253,10 +252,15 @@ static int send_udp_packets(int vary_dst_ip) }; /* vary the UDP destination port for even distribution with roundrobin/xor modes */ - uh->dest++; + uh.dest++; if (vary_dst_ip) - iph->daddr++; + iph.daddr++; + + /* construct a packet */ + memcpy(buf, &eh, sizeof(eh)); + memcpy(buf + sizeof(eh), &iph, sizeof(iph)); + memcpy(buf + sizeof(eh) + sizeof(iph), &uh, sizeof(uh)); n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll)); if (!ASSERT_EQ(n, sizeof(buf), "sendto")) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 3bd5904b4db5..c98a897ad692 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -42,14 +42,13 @@ void test_xdp_bpf2bpf(void) char buf[128]; int err, pkt_fd, map_fd; bool passed = false; - struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + struct iphdr iph; struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; struct bpf_program *prog; struct perf_buffer *pb = NULL; - struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -86,19 +85,17 @@ void test_xdp_bpf2bpf(void) goto out; /* Set up perf buffer */ - pb_opts.sample_cb = on_sample; - pb_opts.ctx = &passed; - pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), - 1, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, + on_sample, NULL, &passed, NULL); if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out; /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - + memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); if (CHECK(err || retval != XDP_TX || size != 74 || - iph->protocol != IPPROTO_IPIP, "ipv4", + iph.protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size)) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c index 4e2a4fd56f67..abe48e82e1dc 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c @@ -29,7 +29,7 @@ void serial_test_xdp_info(void) /* Setup prog */ - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 983ab0b47d30..b2b357f8c74c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -8,46 +8,47 @@ void serial_test_xdp_link(void) { - __u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err; DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1); struct test_xdp_link *skel1 = NULL, *skel2 = NULL; + __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2; struct bpf_link_info link_info; struct bpf_prog_info prog_info; struct bpf_link *link; + int err; __u32 link_info_len = sizeof(link_info); __u32 prog_info_len = sizeof(prog_info); skel1 = test_xdp_link__open_and_load(); - if (CHECK(!skel1, "skel_load", "skeleton open and load failed\n")) + if (!ASSERT_OK_PTR(skel1, "skel_load")) goto cleanup; prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler); skel2 = test_xdp_link__open_and_load(); - if (CHECK(!skel2, "skel_load", "skeleton open and load failed\n")) + if (!ASSERT_OK_PTR(skel2, "skel_load")) goto cleanup; prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler); memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len); - if (CHECK(err, "fd_info1", "failed %d\n", -errno)) + if (!ASSERT_OK(err, "fd_info1")) goto cleanup; id1 = prog_info.id; memset(&prog_info, 0, sizeof(prog_info)); err = bpf_obj_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len); - if (CHECK(err, "fd_info2", "failed %d\n", -errno)) + if (!ASSERT_OK(err, "fd_info2")) goto cleanup; id2 = prog_info.id; /* set initial prog attachment */ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts); - if (CHECK(err, "fd_attach", "initial prog attach failed: %d\n", err)) + if (!ASSERT_OK(err, "fd_attach")) goto cleanup; /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - CHECK(err || id0 != id1, "id1_check", - "loaded prog id %u != id1 %u, err %d", id0, id1, err); + if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) + goto cleanup; /* BPF link is not allowed to replace prog attachment */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); @@ -62,7 +63,7 @@ void serial_test_xdp_link(void) /* detach BPF program */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, XDP_FLAGS_REPLACE, &opts); - if (CHECK(err, "prog_detach", "failed %d\n", err)) + if (!ASSERT_OK(err, "prog_detach")) goto cleanup; /* now BPF link should attach successfully */ @@ -73,24 +74,23 @@ void serial_test_xdp_link(void) /* validate prog ID */ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (CHECK(err || id0 != id1, "id1_check", - "loaded prog id %u != id1 %u, err %d", id0, id1, err)) + if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val")) goto cleanup; /* BPF prog attach is not allowed to replace BPF link */ opts.old_fd = prog_fd1; err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts); - if (CHECK(!err, "prog_attach_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_attach_fail")) goto cleanup; /* Can't force-update when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd2, 0); - if (CHECK(!err, "prog_update_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_update_fail")) goto cleanup; /* Can't force-detach when BPF link is active */ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0); - if (CHECK(!err, "prog_detach_fail", "unexpected success\n")) + if (!ASSERT_ERR(err, "prog_detach_fail")) goto cleanup; /* BPF link is not allowed to replace another BPF link */ @@ -110,40 +110,39 @@ void serial_test_xdp_link(void) skel2->links.xdp_handler = link; err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0); - if (CHECK(err || id0 != id2, "id2_check", - "loaded prog id %u != id2 %u, err %d", id0, id1, err)) + if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val")) goto cleanup; /* updating program under active BPF link works as expected */ err = bpf_link__update_program(link, skel1->progs.xdp_handler); - if (CHECK(err, "link_upd", "failed: %d\n", err)) + if (!ASSERT_OK(err, "link_upd")) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (CHECK(err, "link_info", "failed: %d\n", err)) + if (!ASSERT_OK(err, "link_info")) goto cleanup; - CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type", - "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP); - CHECK(link_info.prog_id != id1, "link_prog_id", - "got %u != exp %u\n", link_info.prog_id, id1); - CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex", - "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO); + ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type"); + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); + ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex"); + + /* updating program under active BPF link with different type fails */ + err = bpf_link__update_program(link, skel1->progs.tc_handler); + if (!ASSERT_ERR(err, "link_upd_invalid")) + goto cleanup; err = bpf_link__detach(link); - if (CHECK(err, "link_detach", "failed %d\n", err)) + if (!ASSERT_OK(err, "link_detach")) goto cleanup; memset(&link_info, 0, sizeof(link_info)); err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &link_info, &link_info_len); - if (CHECK(err, "link_info", "failed: %d\n", err)) - goto cleanup; - CHECK(link_info.prog_id != id1, "link_prog_id", - "got %u != exp %u\n", link_info.prog_id, id1); + + ASSERT_OK(err, "link_info"); + ASSERT_EQ(link_info.prog_id, id1, "link_prog_id"); /* ifindex should be zeroed out */ - CHECK(link_info.xdp.ifindex != 0, "link_ifindex", - "got %u != exp %u\n", link_info.xdp.ifindex, 0); + ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex"); cleanup: test_xdp_link__destroy(skel1); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c index 7185bee16fe4..15a3900e4370 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c @@ -9,7 +9,7 @@ void test_xdp_perf(void) char in[128], out[128]; int err, prog_fd; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (CHECK_FAIL(err)) return; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index 94423902685d..c21e3f545371 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -49,7 +49,7 @@ int dump_unix(struct bpf_iter__unix *ctx) sock_i_ino(sk)); if (unix_sk->addr) { - if (!UNIX_ABSTRACT(unix_sk)) { + if (unix_sk->addr->name->sun_path[0]) { BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path); } else { /* The name of the abstract UNIX domain socket starts diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c new file mode 100644 index 000000000000..12349e4601e8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct callback_ctx { + int output; +}; + +/* These should be set by the user program */ +u32 nested_callback_nr_loops; +u32 stop_index = -1; +u32 nr_loops; +int pid; + +/* Making these global variables so that the userspace program + * can verify the output through the skeleton + */ +int nr_loops_returned; +int g_output; +int err; + +static int callback(__u32 index, void *data) +{ + struct callback_ctx *ctx = data; + + if (index >= stop_index) + return 1; + + ctx->output += index; + + return 0; +} + +static int empty_callback(__u32 index, void *data) +{ + return 0; +} + +static int nested_callback2(__u32 index, void *data) +{ + nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0); + + return 0; +} + +static int nested_callback1(__u32 index, void *data) +{ + bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0); + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int test_prog(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0); + + if (nr_loops_returned < 0) + err = nr_loops_returned; + else + g_output = data.output; + + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int prog_null_ctx(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0); + + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int prog_invalid_flags(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + err = bpf_loop(nr_loops, callback, &data, 1); + + return 0; +} + +SEC("fentry/__x64_sys_nanosleep") +int prog_nested_calls(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + nr_loops_returned = 0; + bpf_loop(nr_loops, nested_callback1, &data, 0); + + g_output = data.output; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c new file mode 100644 index 000000000000..9dafdc244462 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +u32 nr_loops; +long hits; + +static int empty_callback(__u32 index, void *data) +{ + return 0; +} + +SEC("fentry/__x64_sys_getpgid") +int benchmark(void *ctx) +{ + for (int i = 0; i < 1000; i++) { + bpf_loop(nr_loops, empty_callback, NULL, 0); + + __sync_add_and_fetch(&hits, nr_loops); + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index eef5646ddb19..e0f42601be9b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -6,8 +6,6 @@ #define AF_INET6 10 #define __SO_ACCEPTCON (1 << 16) -#define UNIX_HASH_SIZE 256 -#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE) #define SOL_TCP 6 #define TCP_CONGESTION 13 diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/btf_decl_tag.c index 1792f4eda095..c88ccc53529a 100644 --- a/tools/testing/selftests/bpf/progs/tag.c +++ b/tools/testing/selftests/bpf/progs/btf_decl_tag.c @@ -4,10 +4,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#ifndef __has_attribute -#define __has_attribute(x) 0 -#endif - #if __has_attribute(btf_decl_tag) #define __tag1 __attribute__((btf_decl_tag("tag1"))) #define __tag2 __attribute__((btf_decl_tag("tag2"))) diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag.c b/tools/testing/selftests/bpf/progs/btf_type_tag.c new file mode 100644 index 000000000000..1d488da7e920 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_type_tag.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#if __has_attribute(btf_type_tag) +#define __tag1 __attribute__((btf_type_tag("tag1"))) +#define __tag2 __attribute__((btf_type_tag("tag2"))) +volatile const bool skip_tests = false; +#else +#define __tag1 +#define __tag2 +volatile const bool skip_tests = true; +#endif + +struct btf_type_tag_test { + int __tag1 * __tag1 __tag2 *p; +} g; + +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(sub, int x) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c new file mode 100644 index 000000000000..13499cc15c7d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/core_kern.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define ATTR __always_inline +#include "test_jhash.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 256); +} array1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, u32); + __type(value, u32); + __uint(max_entries, 256); +} array2 SEC(".maps"); + +static __noinline int randmap(int v, const struct net_device *dev) +{ + struct bpf_map *map = (struct bpf_map *)&array1; + int key = bpf_get_prandom_u32() & 0xff; + int *val; + + if (bpf_get_prandom_u32() & 1) + map = (struct bpf_map *)&array2; + + val = bpf_map_lookup_elem(map, &key); + if (val) + *val = bpf_get_prandom_u32() + v + dev->mtu; + + return 0; +} + +SEC("tp_btf/xdp_devmap_xmit") +int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device + *from_dev, const struct net_device *to_dev, int sent, int drops, + int err) +{ + return randmap(from_dev->ifindex, from_dev); +} + +SEC("fentry/eth_type_trans") +int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, + struct net_device *dev, unsigned short protocol) +{ + return randmap(dev->ifindex + skb->len, dev); +} + +SEC("fexit/eth_type_trans") +int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, + struct net_device *dev, unsigned short protocol) +{ + return randmap(dev->ifindex + skb->len, dev); +} + +volatile const int never; + +struct __sk_bUfF /* it will not exist in vmlinux */ { + int len; +} __attribute__((preserve_access_index)); + +struct bpf_testmod_test_read_ctx /* it exists in bpf_testmod */ { + size_t len; +} __attribute__((preserve_access_index)); + +SEC("tc") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + void *ptr; + int ret = 0, nh_off, i = 0; + + nh_off = 14; + + /* pragma unroll doesn't work on large loops */ +#define C do { \ + ptr = data + i; \ + if (ptr + nh_off > data_end) \ + break; \ + ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \ + if (never) { \ + /* below is a dead code with unresolvable CO-RE relo */ \ + i += ((struct __sk_bUfF *)ctx)->len; \ + /* this CO-RE relo may or may not resolve + * depending on whether bpf_testmod is loaded. + */ \ + i += ((struct bpf_testmod_test_read_ctx *)ctx)->len; \ + } \ + } while (0); +#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C; + C30;C30;C30; /* 90 calls */ + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c new file mode 100644 index 000000000000..f5ca142abf8f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021, Oracle and/or its affiliates. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +unsigned int exception_triggered; +int test_pid; + +/* TRACE_EVENT(task_newtask, + * TP_PROTO(struct task_struct *p, u64 clone_flags) + */ +SEC("tp_btf/task_newtask") +int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + struct callback_head *work; + void *func; + + if (test_pid != pid) + return 0; + + /* To verify we hit an exception we dereference task->task_works->func. + * If task work has been added, + * - task->task_works is non-NULL; and + * - task->task_works->func is non-NULL also (the callback function + * must be specified for the task work. + * + * However, for a newly-created task, task->task_works is NULLed, + * so we know the exception handler triggered if task_works is + * NULL and func is NULL. + */ + work = task->task_works; + func = work->func; + if (!work && !func) + exception_triggered++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 49a84a3a2306..48cd14b43741 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -73,7 +73,7 @@ int test_subprog2(struct args_subprog2 *ctx) __builtin_preserve_access_index(&skb->len)); ret = ctx->ret; - /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 + /* bpf_prog_test_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 * which randomizes upper 32 bits after BPF_ALU32 insns. * Hence after 'w0 <<= 1' upper bits of $rax are random. * That is expected and correct. Trim them. diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c new file mode 100644 index 000000000000..38034fb82530 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/find_vma.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +struct callback_ctx { + int dummy; +}; + +#define VM_EXEC 0x00000004 +#define DNAME_INLINE_LEN 32 + +pid_t target_pid = 0; +char d_iname[DNAME_INLINE_LEN] = {0}; +__u32 found_vm_exec = 0; +__u64 addr = 0; +int find_zero_ret = -1; +int find_addr_ret = -1; + +static long check_vma(struct task_struct *task, struct vm_area_struct *vma, + struct callback_ctx *data) +{ + if (vma->vm_file) + bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1, + vma->vm_file->f_path.dentry->d_iname); + + /* check for VM_EXEC */ + if (vma->vm_flags & VM_EXEC) + found_vm_exec = 1; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_getpid(void) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct callback_ctx data = {}; + + if (task->pid != target_pid) + return 0; + + find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0); + + /* this should return -ENOENT */ + find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0); + return 0; +} + +SEC("perf_event") +int handle_pe(void) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct callback_ctx data = {}; + + if (task->pid != target_pid) + return 0; + + find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0); + + /* In NMI, this should return -EBUSY, as the previous call is using + * the irq_work. + */ + find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c new file mode 100644 index 000000000000..b3b326b8e2d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct callback_ctx { + int dummy; +}; + +static long write_vma(struct task_struct *task, struct vm_area_struct *vma, + struct callback_ctx *data) +{ + /* writing to vma, which is illegal */ + vma->vm_flags |= 0x55; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_getpid(void) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct callback_ctx data = {}; + + bpf_find_vma(task, 0, write_vma, &data, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail2.c b/tools/testing/selftests/bpf/progs/find_vma_fail2.c new file mode 100644 index 000000000000..9bcf3203e26b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/find_vma_fail2.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct callback_ctx { + int dummy; +}; + +static long write_task(struct task_struct *task, struct vm_area_struct *vma, + struct callback_ctx *data) +{ + /* writing to task, which is illegal */ + task->mm = NULL; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_getpid(void) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct callback_ctx data = {}; + + bpf_find_vma(task, 0, write_task, &data, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c new file mode 100644 index 000000000000..e0f34a55e697 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +char _license[] SEC("license") = "GPL"; + +__u64 test1_result = 0; +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1) +{ + __u64 cnt = bpf_get_func_arg_cnt(ctx); + __u64 a = 0, z = 0, ret = 0; + __s64 err; + + test1_result = cnt == 1; + + /* valid arguments */ + err = bpf_get_func_arg(ctx, 0, &a); + + /* We need to cast access to traced function argument values with + * proper type cast, because trampoline uses type specific instruction + * to save it, like for 'int a' with 32-bit mov like: + * + * mov %edi,-0x8(%rbp) + * + * so the upper 4 bytes are not zeroed. + */ + test1_result &= err == 0 && ((int) a == 1); + + /* not valid argument */ + err = bpf_get_func_arg(ctx, 1, &z); + test1_result &= err == -EINVAL; + + /* return value fails in fentry */ + err = bpf_get_func_ret(ctx, &ret); + test1_result &= err == -EOPNOTSUPP; + return 0; +} + +__u64 test2_result = 0; +SEC("fexit/bpf_fentry_test2") +int BPF_PROG(test2) +{ + __u64 cnt = bpf_get_func_arg_cnt(ctx); + __u64 a = 0, b = 0, z = 0, ret = 0; + __s64 err; + + test2_result = cnt == 2; + + /* valid arguments */ + err = bpf_get_func_arg(ctx, 0, &a); + test2_result &= err == 0 && (int) a == 2; + + err = bpf_get_func_arg(ctx, 1, &b); + test2_result &= err == 0 && b == 3; + + /* not valid argument */ + err = bpf_get_func_arg(ctx, 2, &z); + test2_result &= err == -EINVAL; + + /* return value */ + err = bpf_get_func_ret(ctx, &ret); + test2_result &= err == 0 && ret == 5; + return 0; +} + +__u64 test3_result = 0; +SEC("fmod_ret/bpf_modify_return_test") +int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret) +{ + __u64 cnt = bpf_get_func_arg_cnt(ctx); + __u64 a = 0, b = 0, z = 0, ret = 0; + __s64 err; + + test3_result = cnt == 2; + + /* valid arguments */ + err = bpf_get_func_arg(ctx, 0, &a); + test3_result &= err == 0 && ((int) a == 1); + + err = bpf_get_func_arg(ctx, 1, &b); + test3_result &= err == 0 && ((int *) b == _b); + + /* not valid argument */ + err = bpf_get_func_arg(ctx, 2, &z); + test3_result &= err == -EINVAL; + + /* return value */ + err = bpf_get_func_ret(ctx, &ret); + test3_result &= err == 0 && ret == 0; + + /* change return value, it's checked in fexit_test program */ + return 1234; +} + +__u64 test4_result = 0; +SEC("fexit/bpf_modify_return_test") +int BPF_PROG(fexit_test, int _a, int *_b, int _ret) +{ + __u64 cnt = bpf_get_func_arg_cnt(ctx); + __u64 a = 0, b = 0, z = 0, ret = 0; + __s64 err; + + test4_result = cnt == 2; + + /* valid arguments */ + err = bpf_get_func_arg(ctx, 0, &a); + test4_result &= err == 0 && ((int) a == 1); + + err = bpf_get_func_arg(ctx, 1, &b); + test4_result &= err == 0 && ((int *) b == _b); + + /* not valid argument */ + err = bpf_get_func_arg(ctx, 2, &z); + test4_result &= err == -EINVAL; + + /* return value */ + err = bpf_get_func_ret(ctx, &ret); + test4_result &= err == 0 && ret == 1234; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 95868bc7ada9..9b1f9b75d5c2 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -20,7 +20,6 @@ int sk_storage_result = -1; struct local_storage { struct inode *exec_inode; __u32 value; - struct bpf_spin_lock lock; }; struct { @@ -58,9 +57,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) bpf_get_current_task_btf(), 0, 0); if (storage) { /* Don't let an executable delete itself */ - bpf_spin_lock(&storage->lock); is_self_unlink = storage->exec_inode == victim->d_inode; - bpf_spin_unlock(&storage->lock); if (is_self_unlink) return -EPERM; } @@ -68,7 +65,7 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) return 0; } -SEC("lsm/inode_rename") +SEC("lsm.s/inode_rename") int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) @@ -89,10 +86,8 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, if (!storage) return 0; - bpf_spin_lock(&storage->lock); if (storage->value != DUMMY_STORAGE_VALUE) inode_storage_result = -1; - bpf_spin_unlock(&storage->lock); err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode); if (!err) @@ -101,7 +96,7 @@ int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, return 0; } -SEC("lsm/socket_bind") +SEC("lsm.s/socket_bind") int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, int addrlen) { @@ -117,10 +112,8 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (!storage) return 0; - bpf_spin_lock(&storage->lock); if (storage->value != DUMMY_STORAGE_VALUE) sk_storage_result = -1; - bpf_spin_unlock(&storage->lock); err = bpf_sk_storage_delete(&sk_storage_map, sock->sk); if (!err) @@ -129,7 +122,7 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, return 0; } -SEC("lsm/socket_post_create") +SEC("lsm.s/socket_post_create") int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, int protocol, int kern) { @@ -144,9 +137,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, if (!storage) return 0; - bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; - bpf_spin_unlock(&storage->lock); return 0; } @@ -154,7 +145,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, /* This uses the local storage to remember the inode of the binary that a * process was originally executing. */ -SEC("lsm/bprm_committed_creds") +SEC("lsm.s/bprm_committed_creds") void BPF_PROG(exec, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; @@ -166,18 +157,13 @@ void BPF_PROG(exec, struct linux_binprm *bprm) storage = bpf_task_storage_get(&task_storage_map, bpf_get_current_task_btf(), 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - if (storage) { - bpf_spin_lock(&storage->lock); + if (storage) storage->exec_inode = bprm->file->f_inode; - bpf_spin_unlock(&storage->lock); - } storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!storage) return; - bpf_spin_lock(&storage->lock); storage->value = DUMMY_STORAGE_VALUE; - bpf_spin_unlock(&storage->lock); } diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index 76e93b31c14b..717dab14322b 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -12,9 +12,9 @@ char _license[] SEC("license") = "GPL"; SEC("raw_tracepoint/consume_skb") -int while_true(volatile struct pt_regs* ctx) +int while_true(struct pt_regs *ctx) { - __u64 i = 0, sum = 0; + volatile __u64 i = 0, sum = 0; do { i++; sum += PT_REGS_RC(ctx); diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index b1b711d9b214..b64df94ec476 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -334,9 +334,11 @@ static inline int check_lpm_trie(void) return 1; } +#define INNER_MAX_ENTRIES 1234 + struct inner_map { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); + __uint(max_entries, INNER_MAX_ENTRIES); __type(key, __u32); __type(value, __u32); } inner_map SEC(".maps"); @@ -348,7 +350,7 @@ struct { __type(value, __u32); __array(values, struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); + __uint(max_entries, INNER_MAX_ENTRIES); __type(key, __u32); __type(value, __u32); }); @@ -360,8 +362,13 @@ static inline int check_array_of_maps(void) { struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; + struct bpf_array *inner_map; + int key = 0; VERIFY(check_default(&array_of_maps->map, map)); + inner_map = bpf_map_lookup_elem(array_of_maps, &key); + VERIFY(inner_map != 0); + VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; } @@ -382,8 +389,13 @@ static inline int check_hash_of_maps(void) { struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; + struct bpf_htab *inner_map; + int key = 2; VERIFY(check_default(&hash_of_maps->map, map)); + inner_map = bpf_map_lookup_elem(hash_of_maps, &key); + VERIFY(inner_map != 0); + VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES); return 1; } diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 2fb7adafb6b6..1ed28882daf3 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -159,6 +159,59 @@ struct { __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); +#ifdef USE_BPF_LOOP +struct process_frame_ctx { + int cur_cpu; + int32_t *symbol_counter; + void *frame_ptr; + FrameData *frame; + PidData *pidData; + Symbol *sym; + Event *event; + bool done; +}; + +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) + +static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx) +{ + int zero = 0; + void *frame_ptr = ctx->frame_ptr; + PidData *pidData = ctx->pidData; + FrameData *frame = ctx->frame; + int32_t *symbol_counter = ctx->symbol_counter; + int cur_cpu = ctx->cur_cpu; + Event *event = ctx->event; + Symbol *sym = ctx->sym; + + if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) { + int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu; + int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym); + + if (!symbol_id) { + bpf_map_update_elem(&symbolmap, sym, &zero, 0); + symbol_id = bpf_map_lookup_elem(&symbolmap, sym); + if (!symbol_id) { + ctx->done = true; + return 1; + } + } + if (*symbol_id == new_symbol_id) + (*symbol_counter)++; + + barrier_var(i); + if (i >= STACK_MAX_LEN) + return 1; + + event->stack[i] = *symbol_id; + + event->stack_len = i + 1; + frame_ptr = frame->f_back; + } + return 0; +} +#endif /* USE_BPF_LOOP */ + #ifdef GLOBAL_FUNC __noinline #elif defined(SUBPROGS) @@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); if (symbol_counter == NULL) return 0; +#ifdef USE_BPF_LOOP + struct process_frame_ctx ctx = { + .cur_cpu = cur_cpu, + .symbol_counter = symbol_counter, + .frame_ptr = frame_ptr, + .frame = &frame, + .pidData = pidData, + .sym = &sym, + .event = event, + }; + + bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0); + if (ctx.done) + return 0; +#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma clang loop unroll(full) -#endif +#endif /* NO_UNROLL */ /* Unwind python stack */ for (int i = 0; i < STACK_MAX_LEN; ++i) { if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) { @@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) frame_ptr = frame.f_back; } } +#endif /* USE_BPF_LOOP */ event->stack_complete = frame_ptr == NULL; } else { event->stack_complete = 1; diff --git a/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c new file mode 100644 index 000000000000..5c2059dc01af --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#define STACK_MAX_LEN 600 +#define USE_BPF_LOOP +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c new file mode 100644 index 000000000000..18373a7df76e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <linux/types.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define STRNCMP_STR_SZ 4096 + +/* Will be updated by benchmark before program loading */ +const volatile unsigned int cmp_str_len = 1; +const char target[STRNCMP_STR_SZ]; + +long hits = 0; +char str[STRNCMP_STR_SZ]; + +char _license[] SEC("license") = "GPL"; + +static __always_inline int local_strncmp(const char *s1, unsigned int sz, + const char *s2) +{ + int ret = 0; + unsigned int i; + + for (i = 0; i < sz; i++) { + /* E.g. 0xff > 0x31 */ + ret = (unsigned char)s1[i] - (unsigned char)s2[i]; + if (ret || !s1[i]) + break; + } + + return ret; +} + +SEC("tp/syscalls/sys_enter_getpgid") +int strncmp_no_helper(void *ctx) +{ + if (local_strncmp(str, cmp_str_len + 1, target) < 0) + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("tp/syscalls/sys_enter_getpgid") +int strncmp_helper(void *ctx) +{ + if (bpf_strncmp(str, cmp_str_len + 1, target) < 0) + __sync_add_and_fetch(&hits, 1); + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c new file mode 100644 index 000000000000..900d930d48a8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strncmp_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <stdbool.h> +#include <linux/types.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define STRNCMP_STR_SZ 8 + +const char target[STRNCMP_STR_SZ] = "EEEEEEE"; +char str[STRNCMP_STR_SZ]; +int cmp_ret = 0; +int target_pid = 0; + +const char no_str_target[STRNCMP_STR_SZ] = "12345678"; +char writable_target[STRNCMP_STR_SZ]; +unsigned int no_const_str_size = STRNCMP_STR_SZ; + +char _license[] SEC("license") = "GPL"; + +SEC("tp/syscalls/sys_enter_nanosleep") +int do_strncmp(void *ctx) +{ + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, target); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int strncmp_bad_not_const_str_size(void *ctx) +{ + /* The value of string size is not const, so will fail */ + cmp_ret = bpf_strncmp(str, no_const_str_size, target); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int strncmp_bad_writable_target(void *ctx) +{ + /* Compared target is not read-only, so will fail */ + cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, writable_target); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int strncmp_bad_not_null_term_target(void *ctx) +{ + /* Compared target is not null-terminated, so will fail */ + cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, no_str_target); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 60c93aee2f4a..753718595c26 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, return payload; } +#ifdef USE_BPF_LOOP +enum read_type { + READ_INT_VAR, + READ_MAP_VAR, + READ_STR_VAR, +}; + +struct read_var_ctx { + struct strobemeta_payload *data; + void *tls_base; + struct strobemeta_cfg *cfg; + void *payload; + /* value gets mutated */ + struct strobe_value_generic *value; + enum read_type type; +}; + +static int read_var_callback(__u32 index, struct read_var_ctx *ctx) +{ + switch (ctx->type) { + case READ_INT_VAR: + if (index >= STROBE_MAX_INTS) + return 1; + read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data); + break; + case READ_MAP_VAR: + if (index >= STROBE_MAX_MAPS) + return 1; + ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload); + break; + case READ_STR_VAR: + if (index >= STROBE_MAX_STRS) + return 1; + ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base, + ctx->value, ctx->data, ctx->payload); + break; + } + return 0; +} +#endif /* USE_BPF_LOOP */ + /* * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends @@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task, */ tls_base = (void *)task; +#ifdef USE_BPF_LOOP + struct read_var_ctx ctx = { + .cfg = cfg, + .tls_base = tls_base, + .value = &value, + .data = data, + .payload = payload, + }; + int err; + + ctx.type = READ_INT_VAR; + err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_INTS) + return NULL; + + ctx.type = READ_STR_VAR; + err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_STRS) + return NULL; + + ctx.type = READ_MAP_VAR; + err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0); + if (err != STROBE_MAX_MAPS) + return NULL; +#else #ifdef NO_UNROLL #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_INTS; ++i) { read_int_var(cfg, i, tls_base, &value, data); } @@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_STRS; ++i) { payload += read_str_var(cfg, i, tls_base, &value, data, payload); } @@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task, #pragma clang loop unroll(disable) #else #pragma unroll -#endif +#endif /* NO_UNROLL */ for (int i = 0; i < STROBE_MAX_MAPS; ++i) { payload = read_map_var(cfg, i, tls_base, &value, data, payload); } +#endif /* USE_BPF_LOOP */ + /* * return pointer right after end of payload, so it's possible to * calculate exact amount of useful data that needs to be sent diff --git a/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c new file mode 100644 index 000000000000..d18b992f0165 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 100 +#define STROBE_MAX_MAP_ENTRIES 20 +#define USE_BPF_LOOP +#include "strobemeta.h" diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c new file mode 100644 index 000000000000..27c27cff6a3a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern const int bpf_prog_active __ksym; + +SEC("fentry/security_inode_getattr") +int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + void *active; + __u32 cpu; + + cpu = bpf_get_smp_processor_id(); + active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* FAIL here! 'active' points to readonly memory. bpf helpers + * that update its arguments can not write into it. + */ + bpf_d_path(path, active, sizeof(int)); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c new file mode 100644 index 000000000000..7e02b7361307 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +extern const int bpf_prog_active __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} ringbuf SEC(".maps"); + +SEC("fentry/security_inode_getattr") +int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + void *active; + u32 cpu; + + cpu = bpf_get_smp_processor_id(); + active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* FAIL here! 'active' points to 'regular' memory. It + * cannot be submitted to ring buffer. + */ + bpf_ringbuf_submit(active, 0); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c new file mode 100644 index 000000000000..2180c41cd890 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Google */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + int *active; + __u32 cpu; + + cpu = bpf_get_smp_processor_id(); + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* Kernel memory obtained from bpf_{per,this}_cpu_ptr + * is read-only, should _not_ pass verification. + */ + /* WRITE_ONCE */ + *(volatile int *)active = -1; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c index 8eadbd4caf7a..5f8379aadb29 100644 --- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c +++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c @@ -38,7 +38,7 @@ int pass_handler(const void *ctx) /* tests existing symbols. */ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); if (rq) - out__existing_typed = 0; + out__existing_typed = rq->cpu; out__existing_typeless = (__u64)&bpf_prog_active; /* tests non-existent symbols. */ diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 04fee08863cb..c26057ec46dc 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -448,7 +448,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end, return bpf_redirect(ifindex, 0); } -SEC("l4lb-demo") +SEC("tc") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index b9e2753f4f91..19e4d2071c60 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -447,7 +447,7 @@ static __noinline int process_packet(void *data, __u64 off, void *data_end, return bpf_redirect(ifindex, 0); } -SEC("l4lb-demo") +SEC("tc") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c new file mode 100644 index 000000000000..64c2d9ced529 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#define BPF_NO_GLOBAL_DATA +#include <bpf/bpf_helpers.h> + +char LICENSE[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} my_pid_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} res_map SEC(".maps"); + +volatile int my_pid_var = 0; +volatile int res_var = 0; + +SEC("tp/raw_syscalls/sys_enter") +int handle_legacy(void *ctx) +{ + int zero = 0, *my_pid, cur_pid, *my_res; + + my_pid = bpf_map_lookup_elem(&my_pid_map, &zero); + if (!my_pid) + return 1; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + if (cur_pid != *my_pid) + return 1; + + my_res = bpf_map_lookup_elem(&res_map, &zero); + if (!my_res) + return 1; + + if (*my_res == 0) + /* use bpf_printk() in combination with BPF_NO_GLOBAL_DATA to + * force .rodata.str1.1 section that previously caused + * problems on old kernels due to libbpf always tried to + * create a global data map for it + */ + bpf_printk("Legacy-case bpf_printk test, pid %d\n", cur_pid); + *my_res = 1; + + return *my_res; +} + +SEC("tp/raw_syscalls/sys_enter") +int handle_modern(void *ctx) +{ + int zero = 0, cur_pid; + + cur_pid = bpf_get_current_pid_tgid() >> 32; + if (cur_pid != my_pid_var) + return 1; + + if (res_var == 0) + /* we need bpf_printk() to validate libbpf logic around unused + * global maps and legacy kernels; see comment in handle_legacy() + */ + bpf_printk("Modern-case bpf_printk test, pid %d\n", cur_pid); + res_var = 1; + + return res_var; +} diff --git a/tools/testing/selftests/bpf/progs/test_log_buf.c b/tools/testing/selftests/bpf/progs/test_log_buf.c new file mode 100644 index 000000000000..199f459bd5ae --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_log_buf.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int a[4]; +const volatile int off = 4000; + +SEC("raw_tp/sys_enter") +int good_prog(const void *ctx) +{ + a[0] = (int)(long)ctx; + return a[1]; +} + +SEC("raw_tp/sys_enter") +int bad_prog(const void *ctx) +{ + /* out of bounds access */ + return a[off]; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index b5c07ae7b68f..acf073db9e8b 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -30,7 +30,7 @@ struct { __type(value, struct array_elem); } array_map SEC(".maps"); -SEC("map_lock_demo") +SEC("cgroup/skb") int bpf_map_lock_test(struct __sk_buff *skb) { struct hmap_elem zero = {}, *val; diff --git a/tools/testing/selftests/bpf/progs/test_prog_array_init.c b/tools/testing/selftests/bpf/progs/test_prog_array_init.c new file mode 100644 index 000000000000..2cd138356126 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_prog_array_init.c @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021 Hengqi Chen */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +const volatile pid_t my_pid = 0; +int value = 0; + +SEC("raw_tp/sys_enter") +int tailcall_1(void *ctx) +{ + value = 42; + return 0; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __array(values, int (void *)); +} prog_array_init SEC(".maps") = { + .values = { + [1] = (void *)&tailcall_1, + }, +}; + +SEC("raw_tp/sys_enter") +int entry(void *ctx) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + + if (pid != my_pid) + return 0; + + bpf_tail_call(ctx, &prog_array_init, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 0fcd3ff0e38a..648e8cab7a23 100644 --- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -24,7 +24,7 @@ struct { __uint(value_size, sizeof(__u32)); } map_out SEC(".maps"); -SEC("test") +SEC("tc") int _test(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 19d2465d9442..83b0aaa52ef7 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -84,6 +84,14 @@ int lookup_drop(struct bpf_sk_lookup *ctx) return SK_DROP; } +SEC("sk_lookup") +int check_ifindex(struct bpf_sk_lookup *ctx) +{ + if (ctx->ingress_ifindex == 1) + return SK_DROP; + return SK_PASS; +} + SEC("sk_reuseport") int reuseport_pass(struct sk_reuseport_md *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c index 8e94e5c080aa..6dc1f28fc4b6 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c @@ -68,7 +68,7 @@ static void set_task_info(struct sock *sk) } SEC("fentry/inet_csk_listen_start") -int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk, int backlog) +int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk) { set_task_info(sk); diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 1d61b36e6067..c482110cfc95 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -5,7 +5,7 @@ char _license[] SEC("license") = "GPL"; -SEC("skb_ctx") +SEC("tc") int process(struct __sk_buff *skb) { #pragma clang loop unroll(full) diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c index a408ec95cba4..eacda9fe07eb 100644 --- a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c +++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c @@ -23,7 +23,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog) if (!unix_sk) return 0; - if (!UNIX_ABSTRACT(unix_sk)) + if (unix_sk->addr->name->sun_path[0]) return 0; len = unix_sk->addr->len - sizeof(short); diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index 0d31a3b3505f..7e88309d3229 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -45,7 +45,7 @@ struct { #define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) -SEC("spin_lock_demo") +SEC("tc") int bpf_sping_lock_test(struct __sk_buff *skb) { volatile int credit = 0, max_credit = 100, pkt_len = 64; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index a8233e7f173b..728dbd39eff0 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #ifndef PERF_MAX_STACK_DEPTH @@ -41,11 +41,11 @@ struct { /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[16]; + char prev_comm[TASK_COMM_LEN]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[16]; + char next_comm[TASK_COMM_LEN]; int next_pid; int next_prio; }; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index 2c5c602c6011..e2ae049c2f85 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -244,7 +244,7 @@ static __always_inline void send_basic_event(struct sock *sk, bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); } -SEC("dummy_tracepoint") +SEC("tp/dummy/tracepoint") int _dummy_tracepoint(struct dummy_tracepoint_args *arg) { if (!arg->sock) diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index ce6974016f53..43bd7a20cc50 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -1,17 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; - char prev_comm[16]; + char prev_comm[TASK_COMM_LEN]; int prev_pid; int prev_prio; long long prev_state; - char next_comm[16]; + char next_comm[TASK_COMM_LEN]; int next_pid; int next_prio; }; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index f024154c7be7..f90ffcafd1e8 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #define ATTR __always_inline #include "test_jhash.h" -SEC("scale90_inline") +SEC("tc") int balancer_ingress(struct __sk_buff *ctx) { void *data_end = (void *)(long)ctx->data_end; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c index ee7d6ac0f615..64ff32eaae92 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_link.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c @@ -10,3 +10,9 @@ int xdp_handler(struct xdp_md *xdp) { return 0; } + +SEC("tc") +int tc_handler(struct __sk_buff *skb) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 9a4d09590b3d..2098f3f27f18 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -52,3 +52,10 @@ int bench_trigger_fmodret(void *ctx) __sync_add_and_fetch(&hits, 1); return -22; } + +SEC("uprobe/self/uprobe_target") +int bench_trigger_uprobe(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index be54b7335a76..6bf21e47882a 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -9,7 +9,15 @@ import os, sys LINUX_ROOT = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir)) -BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool') +BPFTOOL_DIR = os.getenv('BPFTOOL_DIR', + os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')) +BPFTOOL_BASHCOMP_DIR = os.getenv('BPFTOOL_BASHCOMP_DIR', + os.path.join(BPFTOOL_DIR, 'bash-completion')) +BPFTOOL_DOC_DIR = os.getenv('BPFTOOL_DOC_DIR', + os.path.join(BPFTOOL_DIR, 'Documentation')) +INCLUDE_DIR = os.getenv('INCLUDE_DIR', + os.path.join(LINUX_ROOT, 'tools/include')) + retval = 0 class BlockParser(object): @@ -242,12 +250,6 @@ class FileExtractor(object): end_marker = re.compile('}\\\\n') return self.__get_description_list(start_marker, pattern, end_marker) - def default_options(self): - """ - Return the default options contained in HELP_SPEC_OPTIONS - """ - return { '-j', '--json', '-p', '--pretty', '-d', '--debug' } - def get_bashcomp_list(self, block_name): """ Search for and parse a list of type names from a variable in bash @@ -274,7 +276,56 @@ class SourceFileExtractor(FileExtractor): defined in children classes. """ def get_options(self): - return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS')) + return self.get_help_list_macro('HELP_SPEC_OPTIONS') + +class MainHeaderFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's main.h + """ + filename = os.path.join(BPFTOOL_DIR, 'main.h') + + def get_common_options(self): + """ + Parse the list of common options in main.h (options that apply to all + commands), which looks to the lists of options in other source files + but has different start and end markers: + + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug} | {-l|--legacy}" + + Return a set containing all options, such as: + + {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + """ + start_marker = re.compile(f'"OPTIONS :=') + pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])') + end_marker = re.compile('#define') + + parser = InlineListParser(self.reader) + parser.search_block(start_marker) + return parser.parse(pattern, end_marker) + +class ManSubstitutionsExtractor(SourceFileExtractor): + """ + An extractor for substitutions.rst + """ + filename = os.path.join(BPFTOOL_DOC_DIR, 'substitutions.rst') + + def get_common_options(self): + """ + Parse the list of common options in substitutions.rst (options that + apply to all commands). + + Return a set containing all options, such as: + + {'-p', '-d', '--legacy', '--pretty', '--debug', '--json', '-l', '-j'} + """ + start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {') + pattern = re.compile('\*\*([\w/-]+)\*\*') + end_marker = re.compile('}$') + + parser = InlineListParser(self.reader) + parser.search_block(start_marker) + return parser.parse(pattern, end_marker) class ProgFileExtractor(SourceFileExtractor): """ @@ -350,7 +401,7 @@ class BpfHeaderExtractor(FileExtractor): """ An extractor for the UAPI BPF header. """ - filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h') + filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') def get_prog_types(self): return self.get_enum('bpf_prog_type') @@ -374,7 +425,7 @@ class ManProgExtractor(ManPageExtractor): """ An extractor for bpftool-prog.rst. """ - filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst') + filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-prog.rst') def get_attach_types(self): return self.get_rst_list('ATTACH_TYPE') @@ -383,7 +434,7 @@ class ManMapExtractor(ManPageExtractor): """ An extractor for bpftool-map.rst. """ - filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst') + filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-map.rst') def get_map_types(self): return self.get_rst_list('TYPE') @@ -392,7 +443,7 @@ class ManCgroupExtractor(ManPageExtractor): """ An extractor for bpftool-cgroup.rst. """ - filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst') + filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-cgroup.rst') def get_attach_types(self): return self.get_rst_list('ATTACH_TYPE') @@ -411,7 +462,7 @@ class BashcompExtractor(FileExtractor): """ An extractor for bpftool's bash completion file. """ - filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool') + filename = os.path.join(BPFTOOL_BASHCOMP_DIR, 'bpftool') def get_prog_attach_types(self): return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') @@ -562,7 +613,7 @@ def main(): help_cmd_options = source_info.get_options() source_info.close() - man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst')) + man_cmd_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool-' + cmd + '.rst')) man_cmd_options = man_cmd_info.get_options() man_cmd_info.close() @@ -573,13 +624,26 @@ def main(): help_main_options = source_main_info.get_options() source_main_info.close() - man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst')) + man_main_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool.rst')) man_main_options = man_main_info.get_options() man_main_info.close() verify(help_main_options, man_main_options, f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):') + # Compare common options (options that apply to all commands) + + main_hdr_info = MainHeaderFileExtractor() + source_common_options = main_hdr_info.get_common_options() + main_hdr_info.close() + + man_substitutions = ManSubstitutionsExtractor() + man_common_options = man_substitutions.get_common_options() + man_substitutions.close() + + verify(source_common_options, man_common_options, + f'Comparing common options from {main_hdr_info.filename} (HELP_SPEC_OPTIONS) and {man_substitutions.filename}:') + sys.exit(retval) if __name__ == "__main__": diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 32c7a57867da..128989bed8b7 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -72,4 +72,7 @@ #define BTF_DECL_TAG_ENC(value, type, component_idx) \ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_TAG_ENC(value, type) \ + BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type) + #endif /* _TEST_BTF_H */ diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 0cda61da5d39..5b8314cd77fd 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -8,6 +8,7 @@ #include "bpf_rlimit.h" #include "cgroup_helpers.h" +#include "testing_helpers.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; @@ -50,15 +51,15 @@ int main(int argc, char **argv) goto err; } - map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), - sizeof(value), 0, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key), + sizeof(value), 0, NULL); if (map_fd < 0) { printf("Failed to create map: %s\n", strerror(errno)); goto out; } - percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, - sizeof(key), sizeof(value), 0, 0); + percpu_map_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL, + sizeof(key), sizeof(value), 0, NULL); if (percpu_map_fd < 0) { printf("Failed to create map: %s\n", strerror(errno)); goto out; @@ -66,7 +67,7 @@ int main(int argc, char **argv) prog[0].imm = percpu_map_fd; prog[7].imm = map_fd; - prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog_fd = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); if (prog_fd < 0) { diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index a8d2e9a87fbf..e00201de2890 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -7,9 +7,15 @@ /* do nothing, just make sure we can link successfully */ +static void dump_printf(void *ctx, const char *fmt, va_list args) +{ +} + int main(int argc, char *argv[]) { + struct btf_dump_opts opts = { }; struct test_core_extern *skel; + struct btf *btf; /* libbpf.h */ libbpf_set_print(NULL); @@ -18,7 +24,8 @@ int main(int argc, char *argv[]) bpf_prog_get_fd_by_id(0); /* btf.h */ - btf__new(NULL, 0); + btf = btf__new(NULL, 0); + btf_dump__new(btf, dump_printf, nullptr, &opts); /* BPF skeleton */ skel = test_core_extern__open_and_load(); diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 804dddd97d4c..c299d3452695 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -14,6 +14,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "testing_helpers.h" #include "bpf_rlimit.h" #define DEV_CGROUP_PROG "./dev_cgroup.o" @@ -27,7 +28,7 @@ int main(int argc, char **argv) int prog_fd, cgroup_fd; __u32 prog_cnt; - if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, + if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE, &obj, &prog_fd)) { printf("Failed to load DEV_CGROUP program\n"); goto out; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index fb5fd6841ef3..ebf68dce5504 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -45,6 +45,8 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "testing_helpers.h" + int main(int argc, char **argv) { struct bpf_object *obj; @@ -58,8 +60,8 @@ int main(int argc, char **argv) return 2; } - ret = bpf_prog_load("test_lirc_mode2_kern.o", - BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); + ret = bpf_prog_test_load("test_lirc_mode2_kern.o", + BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd); if (ret) { printf("Failed to load bpf program\n"); return 1; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 006be3963977..baa3e3ecae82 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -208,6 +208,7 @@ static void test_lpm_order(void) static void test_lpm_map(int keysize) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; struct bpf_lpm_trie_key *key; @@ -233,11 +234,11 @@ static void test_lpm_map(int keysize) key = alloca(sizeof(*key) + keysize); memset(key, 0, sizeof(*key) + keysize); - map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, sizeof(*key) + keysize, keysize + 1, 4096, - BPF_F_NO_PREALLOC); + &opts); assert(map >= 0); for (i = 0; i < n_nodes; ++i) { @@ -329,6 +330,7 @@ static void test_lpm_map(int keysize) static void test_lpm_ipaddr(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct bpf_lpm_trie_key *key_ipv4; struct bpf_lpm_trie_key *key_ipv6; size_t key_size_ipv4; @@ -342,14 +344,14 @@ static void test_lpm_ipaddr(void) key_ipv4 = alloca(key_size_ipv4); key_ipv6 = alloca(key_size_ipv6); - map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size_ipv4, sizeof(value), - 100, BPF_F_NO_PREALLOC); + 100, &opts); assert(map_fd_ipv4 >= 0); - map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size_ipv6, sizeof(value), - 100, BPF_F_NO_PREALLOC); + 100, &opts); assert(map_fd_ipv6 >= 0); /* Fill data some IPv4 and IPv6 address ranges */ @@ -423,6 +425,7 @@ static void test_lpm_ipaddr(void) static void test_lpm_delete(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct bpf_lpm_trie_key *key; size_t key_size; int map_fd; @@ -431,9 +434,9 @@ static void test_lpm_delete(void) key_size = sizeof(*key) + sizeof(__u32); key = alloca(key_size); - map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), - 100, BPF_F_NO_PREALLOC); + 100, &opts); assert(map_fd >= 0); /* Add nodes: @@ -535,6 +538,7 @@ static void test_lpm_delete(void) static void test_lpm_get_next_key(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct bpf_lpm_trie_key *key_p, *next_key_p; size_t key_size; __u32 value = 0; @@ -544,8 +548,7 @@ static void test_lpm_get_next_key(void) key_p = alloca(key_size); next_key_p = alloca(key_size); - map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value), - 100, BPF_F_NO_PREALLOC); + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts); assert(map_fd >= 0); /* empty tree. get_next_key should return ENOENT */ @@ -753,6 +756,7 @@ static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd) static void test_lpm_multi_thread(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); struct lpm_mt_test_info info[4]; size_t key_size, value_size; pthread_t thread_id[4]; @@ -762,8 +766,7 @@ static void test_lpm_multi_thread(void) /* create a trie */ value_size = sizeof(__u32); key_size = sizeof(struct bpf_lpm_trie_key) + value_size; - map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size, - 100, BPF_F_NO_PREALLOC); + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts); /* create 4 threads to test update, delete, lookup and get_next_key */ setup_lpm_mt_test_info(&info[0], map_fd); diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 7e9049fa3edf..b9f1bbbc8aba 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -28,13 +28,14 @@ static int nr_cpus; static int create_map(int map_type, int map_flags, unsigned int size) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); int map_fd; - map_fd = bpf_create_map(map_type, sizeof(unsigned long long), - sizeof(unsigned long long), size, map_flags); + map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), + sizeof(unsigned long long), size, &opts); if (map_fd == -1) - perror("bpf_create_map"); + perror("bpf_map_create"); return map_fd; } @@ -42,8 +43,6 @@ static int create_map(int map_type, int map_flags, unsigned int size) static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, void *value) { - struct bpf_load_program_attr prog; - struct bpf_create_map_attr map; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0), BPF_LD_MAP_FD(BPF_REG_1, fd), @@ -64,25 +63,13 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key, int mfd, pfd, ret, zero = 0; __u32 retval = 0; - memset(&map, 0, sizeof(map)); - map.map_type = BPF_MAP_TYPE_ARRAY; - map.key_size = sizeof(int); - map.value_size = sizeof(unsigned long long); - map.max_entries = 1; - - mfd = bpf_create_map_xattr(&map); + mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL); if (mfd < 0) return -1; insns[0].imm = mfd; - memset(&prog, 0, sizeof(prog)); - prog.prog_type = BPF_PROG_TYPE_SCHED_CLS; - prog.insns = insns; - prog.insns_cnt = ARRAY_SIZE(insns); - prog.license = "GPL"; - - pfd = bpf_load_program_xattr(&prog, NULL, 0); + pfd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", insns, ARRAY_SIZE(insns), NULL); if (pfd < 0) { close(mfd); return -1; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index c7a36a9378f8..50f7e74ca0b9 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -23,8 +23,8 @@ #include <bpf/libbpf.h> #include "bpf_util.h" -#include "bpf_rlimit.h" #include "test_maps.h" +#include "testing_helpers.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -32,15 +32,14 @@ static int skips; -static int map_flags; +static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) }; static void test_hashmap(unsigned int task, void *data) { long long key, next_key, first_key, value; int fd; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - 2, map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), 2, &map_opts); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -137,8 +136,7 @@ static void test_hashmap_sizes(unsigned int task, void *data) for (i = 1; i <= 512; i <<= 1) for (j = 1; j <= 1 << 18; j <<= 1) { - fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, - 2, map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, i, j, 2, &map_opts); if (fd < 0) { if (errno == ENOMEM) return; @@ -159,8 +157,8 @@ static void test_hashmap_percpu(unsigned int task, void *data) int expected_key_mask = 0; int fd, i; - fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), - sizeof(bpf_percpu(value, 0)), 2, map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, NULL, sizeof(key), + sizeof(bpf_percpu(value, 0)), 2, &map_opts); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -271,11 +269,11 @@ static int helper_fill_hashmap(int max_entries) int i, fd, ret; long long key, value; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - max_entries, map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), + max_entries, &map_opts); CHECK(fd < 0, "failed to create hashmap", - "err: %s, flags: 0x%x\n", strerror(errno), map_flags); + "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags); for (i = 0; i < max_entries; i++) { key = i; value = key; @@ -331,8 +329,8 @@ static void test_hashmap_zero_seed(void) int i, first, second, old_flags; long long key, next_first, next_second; - old_flags = map_flags; - map_flags |= BPF_F_ZERO_SEED; + old_flags = map_opts.map_flags; + map_opts.map_flags |= BPF_F_ZERO_SEED; first = helper_fill_hashmap(3); second = helper_fill_hashmap(3); @@ -354,7 +352,7 @@ static void test_hashmap_zero_seed(void) key = next_first; } - map_flags = old_flags; + map_opts.map_flags = old_flags; close(first); close(second); } @@ -364,8 +362,7 @@ static void test_arraymap(unsigned int task, void *data) int key, next_key, fd; long long value; - fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), - 2, 0); + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), 2, NULL); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); @@ -420,8 +417,8 @@ static void test_arraymap_percpu(unsigned int task, void *data) BPF_DECLARE_PERCPU(long, values); int key, next_key, fd, i; - fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(bpf_percpu(values, 0)), 2, 0); + fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key), + sizeof(bpf_percpu(values, 0)), 2, NULL); if (fd < 0) { printf("Failed to create arraymap '%s'!\n", strerror(errno)); exit(1); @@ -483,8 +480,8 @@ static void test_arraymap_percpu_many_keys(void) unsigned int nr_keys = 2000; int key, fd, i; - fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), - sizeof(bpf_percpu(values, 0)), nr_keys, 0); + fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key), + sizeof(bpf_percpu(values, 0)), nr_keys, NULL); if (fd < 0) { printf("Failed to create per-cpu arraymap '%s'!\n", strerror(errno)); @@ -515,8 +512,7 @@ static void test_devmap(unsigned int task, void *data) int fd; __u32 key, value; - fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), - 2, 0); + fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, NULL, sizeof(key), sizeof(value), 2, NULL); if (fd < 0) { printf("Failed to create devmap '%s'!\n", strerror(errno)); exit(1); @@ -530,8 +526,7 @@ static void test_devmap_hash(unsigned int task, void *data) int fd; __u32 key, value; - fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value), - 2, 0); + fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP_HASH, NULL, sizeof(key), sizeof(value), 2, NULL); if (fd < 0) { printf("Failed to create devmap_hash '%s'!\n", strerror(errno)); exit(1); @@ -551,14 +546,12 @@ static void test_queuemap(unsigned int task, void *data) vals[i] = rand(); /* Invalid key size */ - fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, - map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 4, sizeof(val), MAP_SIZE, &map_opts); assert(fd < 0 && errno == EINVAL); - fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, - map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 0, sizeof(val), MAP_SIZE, &map_opts); /* Queue map does not support BPF_F_NO_PREALLOC */ - if (map_flags & BPF_F_NO_PREALLOC) { + if (map_opts.map_flags & BPF_F_NO_PREALLOC) { assert(fd < 0 && errno == EINVAL); return; } @@ -609,14 +602,12 @@ static void test_stackmap(unsigned int task, void *data) vals[i] = rand(); /* Invalid key size */ - fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, - map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 4, sizeof(val), MAP_SIZE, &map_opts); assert(fd < 0 && errno == EINVAL); - fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, - map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 0, sizeof(val), MAP_SIZE, &map_opts); /* Stack map does not support BPF_F_NO_PREALLOC */ - if (map_flags & BPF_F_NO_PREALLOC) { + if (map_opts.map_flags & BPF_F_NO_PREALLOC) { assert(fd < 0 && errno == EINVAL); return; } @@ -743,9 +734,9 @@ static void test_sockmap(unsigned int tasks, void *data) } /* Test sockmap with connected sockets */ - fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, + fd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), sizeof(value), - 6, 0); + 6, NULL); if (fd < 0) { if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) { printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n", @@ -830,21 +821,21 @@ static void test_sockmap(unsigned int tasks, void *data) } /* Load SK_SKB program and Attach */ - err = bpf_prog_load(SOCKMAP_PARSE_PROG, + err = bpf_prog_test_load(SOCKMAP_PARSE_PROG, BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog); if (err) { printf("Failed to load SK_SKB parse prog\n"); goto out_sockmap; } - err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG, + err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG, BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog); if (err) { printf("Failed to load SK_SKB msg prog\n"); goto out_sockmap; } - err = bpf_prog_load(SOCKMAP_VERDICT_PROG, + err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG, BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog); if (err) { printf("Failed to load SK_SKB verdict prog\n"); @@ -1167,8 +1158,7 @@ static void test_map_in_map(void) obj = bpf_object__open(MAPINMAP_PROG); - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), - 2, 0); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 2, NULL); if (fd < 0) { printf("Failed to create hashmap '%s'!\n", strerror(errno)); exit(1); @@ -1314,8 +1304,8 @@ static void test_map_large(void) } key; int fd, i, value; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - MAP_SIZE, map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), + MAP_SIZE, &map_opts); if (fd < 0) { printf("Failed to create large map '%s'!\n", strerror(errno)); exit(1); @@ -1468,8 +1458,8 @@ static void test_map_parallel(void) int i, fd, key = 0, value = 0; int data[2]; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - MAP_SIZE, map_flags); + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), + MAP_SIZE, &map_opts); if (fd < 0) { printf("Failed to create map for parallel test '%s'!\n", strerror(errno)); @@ -1517,9 +1507,13 @@ static void test_map_parallel(void) static void test_map_rdonly(void) { int fd, key = 0, value = 0; + __u32 old_flags; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - MAP_SIZE, map_flags | BPF_F_RDONLY); + old_flags = map_opts.map_flags; + map_opts.map_flags |= BPF_F_RDONLY; + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), + MAP_SIZE, &map_opts); + map_opts.map_flags = old_flags; if (fd < 0) { printf("Failed to create map for read only test '%s'!\n", strerror(errno)); @@ -1542,9 +1536,13 @@ static void test_map_rdonly(void) static void test_map_wronly_hash(void) { int fd, key = 0, value = 0; + __u32 old_flags; - fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), - MAP_SIZE, map_flags | BPF_F_WRONLY); + old_flags = map_opts.map_flags; + map_opts.map_flags |= BPF_F_WRONLY; + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), + MAP_SIZE, &map_opts); + map_opts.map_flags = old_flags; if (fd < 0) { printf("Failed to create map for write only test '%s'!\n", strerror(errno)); @@ -1566,13 +1564,17 @@ static void test_map_wronly_hash(void) static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type) { int fd, value = 0; + __u32 old_flags; + assert(map_type == BPF_MAP_TYPE_QUEUE || map_type == BPF_MAP_TYPE_STACK); - fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE, - map_flags | BPF_F_WRONLY); + old_flags = map_opts.map_flags; + map_opts.map_flags |= BPF_F_WRONLY; + fd = bpf_map_create(map_type, NULL, 0, sizeof(value), MAP_SIZE, &map_opts); + map_opts.map_flags = old_flags; /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */ - if (map_flags & BPF_F_NO_PREALLOC) { + if (map_opts.map_flags & BPF_F_NO_PREALLOC) { assert(fd < 0 && errno == EINVAL); return; } @@ -1699,8 +1701,8 @@ static void test_reuseport_array(void) __u32 fds_idx = 0; int fd; - map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - sizeof(__u32), sizeof(__u64), array_size, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL, + sizeof(__u32), sizeof(__u64), array_size, NULL); CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); @@ -1836,8 +1838,8 @@ static void test_reuseport_array(void) close(map_fd); /* Test 32 bit fd */ - map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, - sizeof(__u32), sizeof(__u32), array_size, 0); + map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL, + sizeof(__u32), sizeof(__u32), array_size, NULL); CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64, @@ -1895,10 +1897,10 @@ int main(void) libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - map_flags = 0; + map_opts.map_flags = 0; run_all_tests(); - map_flags = BPF_F_NO_PREALLOC; + map_opts.map_flags = BPF_F_NO_PREALLOC; run_all_tests(); #define DEFINE_TEST(name) test_##name(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c65986bd9d07..2ecb73a65206 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -4,7 +4,6 @@ #define _GNU_SOURCE #include "test_progs.h" #include "cgroup_helpers.h" -#include "bpf_rlimit.h" #include <argp.h> #include <pthread.h> #include <sched.h> @@ -473,11 +472,11 @@ static struct prog_test_def prog_test_defs[] = { #include <prog_tests/tests.h> #undef DEFINE_TEST }; -const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); +static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); const char *argp_program_version = "test_progs 0.1"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; -const char argp_program_doc[] = "BPF selftests test runner"; +static const char argp_program_doc[] = "BPF selftests test runner"; enum ARG_KEYS { ARG_TEST_NUM = 'n', @@ -939,7 +938,7 @@ static void *dispatch_thread(void *ctx) { struct dispatch_data *data = ctx; int sock_fd; - FILE *log_fd = NULL; + FILE *log_fp = NULL; sock_fd = data->sock_fd; @@ -1002,8 +1001,8 @@ static void *dispatch_thread(void *ctx) /* collect all logs */ if (msg_test_done.test_done.have_log) { - log_fd = open_memstream(&result->log_buf, &result->log_cnt); - if (!log_fd) + log_fp = open_memstream(&result->log_buf, &result->log_cnt); + if (!log_fp) goto error; while (true) { @@ -1014,12 +1013,12 @@ static void *dispatch_thread(void *ctx) if (msg_log.type != MSG_TEST_LOG) goto error; - fprintf(log_fd, "%s", msg_log.test_log.log_buf); + fprintf(log_fp, "%s", msg_log.test_log.log_buf); if (msg_log.test_log.is_last) break; } - fclose(log_fd); - log_fd = NULL; + fclose(log_fp); + log_fp = NULL; } /* output log */ { @@ -1045,8 +1044,8 @@ error: if (env.debug) fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno)); - if (log_fd) - fclose(log_fd); + if (log_fp) + fclose(log_fp); done: { struct msg msg_exit; @@ -1198,11 +1197,11 @@ static int server_main(void) env.sub_succ_cnt += result->sub_succ_cnt; } + print_all_error_logs(); + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); - print_all_error_logs(); - /* reap all workers */ for (i = 0; i < env.workers; i++) { int wstatus, pid; @@ -1342,7 +1341,6 @@ int main(int argc, char **argv) /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - libbpf_set_print(libbpf_print_fn); srand(time(NULL)); @@ -1484,11 +1482,11 @@ int main(int argc, char **argv) if (env.list_test_names) goto out; + print_all_error_logs(); + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); - print_all_error_logs(); - close(env.saved_netns_fd); out: if (!env.list_test_names && env.has_testmod) diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 9613f7538840..fe10f8134278 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -35,18 +35,21 @@ struct sock_test { /* Endpoint to bind() to */ const char *ip; unsigned short port; + unsigned short port_retry; /* Expected test result */ enum { LOAD_REJECT, ATTACH_REJECT, BIND_REJECT, SUCCESS, + RETRY_SUCCESS, + RETRY_REJECT } result; }; static struct sock_test tests[] = { { - "bind4 load with invalid access: src_ip6", + .descr = "bind4 load with invalid access: src_ip6", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -54,16 +57,12 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = LOAD_REJECT, }, { - "bind4 load with invalid access: mark", + .descr = "bind4 load with invalid access: mark", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -71,16 +70,12 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = LOAD_REJECT, }, { - "bind6 load with invalid access: src_ip4", + .descr = "bind6 load with invalid access: src_ip4", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -88,16 +83,12 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .result = LOAD_REJECT, }, { - "sock_create load with invalid access: src_port", + .descr = "sock_create load with invalid access: src_port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -105,128 +96,106 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET_SOCK_CREATE, - BPF_CGROUP_INET_SOCK_CREATE, - 0, - 0, - NULL, - 0, - LOAD_REJECT, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .result = LOAD_REJECT, }, { - "sock_create load w/o expected_attach_type (compat mode)", + .descr = "sock_create load w/o expected_attach_type (compat mode)", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - 0, - BPF_CGROUP_INET_SOCK_CREATE, - AF_INET, - SOCK_STREAM, - "127.0.0.1", - 8097, - SUCCESS, + .expected_attach_type = 0, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 8097, + .result = SUCCESS, }, { - "sock_create load w/ expected_attach_type", + .descr = "sock_create load w/ expected_attach_type", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET_SOCK_CREATE, - BPF_CGROUP_INET_SOCK_CREATE, - AF_INET, - SOCK_STREAM, - "127.0.0.1", - 8097, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 8097, + .result = SUCCESS, }, { - "attach type mismatch bind4 vs bind6", + .descr = "attach type mismatch bind4 vs bind6", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .result = ATTACH_REJECT, }, { - "attach type mismatch bind6 vs bind4", + .descr = "attach type mismatch bind6 vs bind4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = ATTACH_REJECT, }, { - "attach type mismatch default vs bind4", + .descr = "attach type mismatch default vs bind4", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - 0, - BPF_CGROUP_INET4_POST_BIND, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = 0, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .result = ATTACH_REJECT, }, { - "attach type mismatch bind6 vs sock_create", + .descr = "attach type mismatch bind6 vs sock_create", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET_SOCK_CREATE, - 0, - 0, - NULL, - 0, - ATTACH_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .result = ATTACH_REJECT, }, { - "bind4 reject all", + .descr = "bind4 reject all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - AF_INET, - SOCK_STREAM, - "0.0.0.0", - 0, - BIND_REJECT, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "0.0.0.0", + .result = BIND_REJECT, }, { - "bind6 reject all", + .descr = "bind6 reject all", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - AF_INET6, - SOCK_STREAM, - "::", - 0, - BIND_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::", + .result = BIND_REJECT, }, { - "bind6 deny specific IP & port", + .descr = "bind6 deny specific IP & port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -247,16 +216,16 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - AF_INET6, - SOCK_STREAM, - "::1", - 8193, - BIND_REJECT, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::1", + .port = 8193, + .result = BIND_REJECT, }, { - "bind4 allow specific IP & port", + .descr = "bind4 allow specific IP & port", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), @@ -277,41 +246,132 @@ static struct sock_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - AF_INET, - SOCK_STREAM, - "127.0.0.1", - 4098, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 4098, + .result = SUCCESS, }, { - "bind4 allow all", + .descr = "bind4 deny specific IP & port of TCP, and retry", .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x7F000001), 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET4_POST_BIND, - BPF_CGROUP_INET4_POST_BIND, - AF_INET, - SOCK_STREAM, - "0.0.0.0", - 0, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "127.0.0.1", + .port = 4098, + .port_retry = 5000, + .result = RETRY_SUCCESS, }, { - "bind6 allow all", + .descr = "bind4 deny specific IP & port of UDP, and retry", .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip4)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x7F000001), 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - BPF_CGROUP_INET6_POST_BIND, - BPF_CGROUP_INET6_POST_BIND, - AF_INET6, - SOCK_STREAM, - "::", - 0, - SUCCESS, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_DGRAM, + .ip = "127.0.0.1", + .port = 4098, + .port_retry = 5000, + .result = RETRY_SUCCESS, + }, + { + .descr = "bind6 deny specific IP & port, and retry", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* if (ip == expected && port == expected) */ + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_ip6[3])), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x00000001), 4), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock, src_port)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), + + /* return DENY; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_A(1), + + /* else return ALLOW; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::1", + .port = 8193, + .port_retry = 9000, + .result = RETRY_SUCCESS, + }, + { + .descr = "bind4 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, + .attach_type = BPF_CGROUP_INET4_POST_BIND, + .domain = AF_INET, + .type = SOCK_STREAM, + .ip = "0.0.0.0", + .result = SUCCESS, + }, + { + .descr = "bind6 allow all", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, + .attach_type = BPF_CGROUP_INET6_POST_BIND, + .domain = AF_INET6, + .type = SOCK_STREAM, + .ip = "::", + .result = SUCCESS, }, }; @@ -328,18 +388,17 @@ static size_t probe_prog_length(const struct bpf_insn *fp) static int load_sock_prog(const struct bpf_insn *prog, enum bpf_attach_type attach_type) { - struct bpf_load_program_attr attr; - int ret; - - memset(&attr, 0, sizeof(struct bpf_load_program_attr)); - attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; - attr.expected_attach_type = attach_type; - attr.insns = prog; - attr.insns_cnt = probe_prog_length(attr.insns); - attr.license = "GPL"; - attr.log_level = 2; - - ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + LIBBPF_OPTS(bpf_prog_load_opts, opts); + int ret, insn_cnt; + + insn_cnt = probe_prog_length(prog); + + opts.expected_attach_type = attach_type; + opts.log_buf = bpf_log_buf; + opts.log_size = BPF_LOG_BUF_SIZE; + opts.log_level = 2; + + ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts); if (verbose && ret < 0) fprintf(stderr, "%s\n", bpf_log_buf); @@ -352,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd, return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); } -static int bind_sock(int domain, int type, const char *ip, unsigned short port) +static int bind_sock(int domain, int type, const char *ip, + unsigned short port, unsigned short port_retry) { struct sockaddr_storage addr; struct sockaddr_in6 *addr6; struct sockaddr_in *addr4; int sockfd = -1; socklen_t len; - int err = 0; + int res = SUCCESS; sockfd = socket(domain, type, 0); if (sockfd < 0) @@ -385,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port) goto err; } - if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) - goto err; + if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { + /* sys_bind() may fail for different reasons, errno has to be + * checked to confirm that BPF program rejected it. + */ + if (errno != EPERM) + goto err; + if (port_retry) + goto retry; + res = BIND_REJECT; + goto out; + } goto out; +retry: + if (domain == AF_INET) + addr4->sin_port = htons(port_retry); + else + addr6->sin6_port = htons(port_retry); + if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) { + if (errno != EPERM) + goto err; + res = RETRY_REJECT; + } else { + res = RETRY_SUCCESS; + } + goto out; err: - err = -1; + res = -1; out: close(sockfd); - return err; + return res; } static int run_test_case(int cgfd, const struct sock_test *test) { int progfd = -1; int err = 0; + int res; printf("Test case: %s .. ", test->descr); progfd = load_sock_prog(test->insns, test->expected_attach_type); @@ -417,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test) goto err; } - if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) { - /* sys_bind() may fail for different reasons, errno has to be - * checked to confirm that BPF program rejected it. - */ - if (test->result == BIND_REJECT && errno == EPERM) - goto out; - else - goto err; - } - + res = bind_sock(test->domain, test->type, test->ip, test->port, + test->port_retry); + if (res > 0 && test->result == res) + goto out; - if (test->result != SUCCESS) - goto err; - - goto out; err: err = -1; out: diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index aa3f185fcb89..f0c8d05ba6d1 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -645,17 +645,14 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port, static int load_insns(const struct sock_addr_test *test, const struct bpf_insn *insns, size_t insns_cnt) { - struct bpf_load_program_attr load_attr; + LIBBPF_OPTS(bpf_prog_load_opts, opts); int ret; - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; - load_attr.expected_attach_type = test->expected_attach_type; - load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; - load_attr.license = "GPL"; + opts.expected_attach_type = test->expected_attach_type; + opts.log_buf = bpf_log_buf; + opts.log_size = BPF_LOG_BUF_SIZE; - ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts); if (ret < 0 && test->expected_result != LOAD_REJECT) { log_err(">>> Loading program error.\n" ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); @@ -666,23 +663,36 @@ static int load_insns(const struct sock_addr_test *test, static int load_path(const struct sock_addr_test *test, const char *path) { - struct bpf_prog_load_attr attr; struct bpf_object *obj; - int prog_fd; + struct bpf_program *prog; + int err; - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = path; - attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; - attr.expected_attach_type = test->expected_attach_type; - attr.prog_flags = BPF_F_TEST_RND_HI32; + obj = bpf_object__open_file(path, NULL); + err = libbpf_get_error(obj); + if (err) { + log_err(">>> Opening BPF object (%s) error.\n", path); + return -1; + } + + prog = bpf_object__next_program(obj, NULL); + if (!prog) + goto err_out; - if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { + bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); + bpf_program__set_expected_attach_type(prog, test->expected_attach_type); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + + err = bpf_object__load(obj); + if (err) { if (test->expected_result != LOAD_REJECT) log_err(">>> Loading program (%s) error.\n", path); - return -1; + goto err_out; } - return prog_fd; + return bpf_program__fd(prog); +err_out: + bpf_object__close(obj); + return -1; } static int bind4_prog_load(const struct sock_addr_test *test) diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c deleted file mode 100644 index 47e132726203..000000000000 --- a/tools/testing/selftests/bpf/test_stub.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* Copyright (C) 2019 Netronome Systems, Inc. */ - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> -#include <string.h> - -int extra_prog_load_log_flags = 0; - -int bpf_prog_test_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_prog_load_attr attr; - - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = file; - attr.prog_type = type; - attr.expected_attach_type = 0; - attr.prog_flags = BPF_F_TEST_RND_HI32; - attr.log_level = extra_prog_load_log_flags; - - return bpf_prog_load_xattr(&attr, pobj, prog_fd); -} - -int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, const char *license, - __u32 kern_version, char *log_buf, - size_t log_buf_sz) -{ - struct bpf_load_program_attr load_attr; - - memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = 0; - load_attr.name = NULL; - load_attr.insns = insns; - load_attr.insns_cnt = insns_cnt; - load_attr.license = license; - load_attr.kern_version = kern_version; - load_attr.prog_flags = BPF_F_TEST_RND_HI32; - load_attr.log_level = extra_prog_load_log_flags; - - return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); -} diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a3bb6d399daa..4f6cf833b522 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -17,6 +17,7 @@ #include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" +#include "testing_helpers.h" #define CG_PATH "/foo" #define MAX_INSNS 512 @@ -1435,14 +1436,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test, const char *sysctl_path) { struct bpf_insn *prog = test->insns; - struct bpf_load_program_attr attr; - int ret; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + int ret, insn_cnt; - memset(&attr, 0, sizeof(struct bpf_load_program_attr)); - attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; - attr.insns = prog; - attr.insns_cnt = probe_prog_length(attr.insns); - attr.license = "GPL"; + insn_cnt = probe_prog_length(prog); if (test->fixup_value_insn) { char buf[128]; @@ -1465,7 +1462,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test, return -1; } - ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + opts.log_buf = bpf_log_buf; + opts.log_size = BPF_LOG_BUF_SIZE; + + ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts); if (ret < 0 && test->result != LOAD_REJECT) { log_err(">>> Loading program error.\n" ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); @@ -1476,15 +1476,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test, static int load_sysctl_prog_file(struct sysctl_test *test) { - struct bpf_prog_load_attr attr; struct bpf_object *obj; int prog_fd; - memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); - attr.file = test->prog_file; - attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL; - - if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) { + if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) { if (test->result != LOAD_REJECT) log_err(">>> Loading program (%s) error.\n", test->prog_file); diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c index 6272c784ca2a..0851c42ee31c 100644 --- a/tools/testing/selftests/bpf/test_tag.c +++ b/tools/testing/selftests/bpf/test_tag.c @@ -21,6 +21,7 @@ #include "../../../include/linux/filter.h" #include "bpf_rlimit.h" +#include "testing_helpers.h" static struct bpf_insn prog[BPF_MAXINSNS]; @@ -57,7 +58,7 @@ static int bpf_try_load_prog(int insns, int fd_map, int fd_prog; bpf_filler(insns, fd_map); - fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0, + fd_prog = bpf_test_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0, NULL, 0); assert(fd_prog > 0); if (fd_map > 0) @@ -184,11 +185,12 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map, int main(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); uint32_t tests = 0; int i, fd_map; - fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), - sizeof(int), 1, BPF_F_NO_PREALLOC); + fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), 1, &opts); assert(fd_map > 0); for (i = 0; i < 5; i++) { diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 4a39304cc5a6..4c5114765b23 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -25,6 +25,7 @@ #include "test_tcpnotify.h" #include "trace_helpers.h" +#include "testing_helpers.h" #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) @@ -71,7 +72,6 @@ int main(int argc, char **argv) { const char *file = "test_tcpnotify_kern.o"; struct bpf_map *perf_map, *global_map; - struct perf_buffer_opts pb_opts = {}; struct tcpnotify_globals g = {0}; struct perf_buffer *pb = NULL; const char *cg_path = "/foo"; @@ -92,7 +92,7 @@ int main(int argc, char **argv) if (cg_fd < 0) goto err; - if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + if (bpf_prog_test_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; } @@ -116,8 +116,7 @@ int main(int argc, char **argv) return -1; } - pb_opts.sample_cb = dummyfn; - pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts); + pb = perf_buffer__new(bpf_map__fd(perf_map), 8, dummyfn, NULL, NULL, NULL); if (!pb) goto err; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d3bf83d5c6cf..76cd903117af 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -41,7 +41,6 @@ # define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1 # endif #endif -#include "bpf_rlimit.h" #include "bpf_rand.h" #include "bpf_util.h" #include "test_btf.h" @@ -462,11 +461,11 @@ static int __create_map(uint32_t type, uint32_t size_key, uint32_t size_value, uint32_t max_elem, uint32_t extra_flags) { + LIBBPF_OPTS(bpf_map_create_opts, opts); int fd; - fd = bpf_create_map(type, size_key, size_value, max_elem, - (type == BPF_MAP_TYPE_HASH ? - BPF_F_NO_PREALLOC : 0) | extra_flags); + opts.map_flags = (type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0) | extra_flags; + fd = bpf_map_create(type, NULL, size_key, size_value, max_elem, &opts); if (fd < 0) { if (skip_unsupported_map(type)) return -1; @@ -499,8 +498,7 @@ static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret) BPF_EXIT_INSN(), }; - return bpf_load_program(prog_type, prog, - ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL); } static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd, @@ -515,8 +513,7 @@ static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd, BPF_EXIT_INSN(), }; - return bpf_load_program(prog_type, prog, - ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL); } static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, @@ -524,8 +521,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, { int mfd, p1fd, p2fd, p3fd; - mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), - sizeof(int), max_elem, 0); + mfd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, NULL, sizeof(int), + sizeof(int), max_elem, NULL); if (mfd < 0) { if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY)) return -1; @@ -555,10 +552,11 @@ err: static int create_map_in_map(void) { + LIBBPF_OPTS(bpf_map_create_opts, opts); int inner_map_fd, outer_map_fd; - inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), - sizeof(int), 1, 0); + inner_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), + sizeof(int), 1, NULL); if (inner_map_fd < 0) { if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY)) return -1; @@ -566,8 +564,9 @@ static int create_map_in_map(void) return inner_map_fd; } - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, - sizeof(int), inner_map_fd, 1, 0); + opts.inner_map_fd = inner_map_fd; + outer_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, + sizeof(int), sizeof(int), 1, &opts); if (outer_map_fd < 0) { if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS)) return -1; @@ -586,8 +585,8 @@ static int create_cgroup_storage(bool percpu) BPF_MAP_TYPE_CGROUP_STORAGE; int fd; - fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), - TEST_DATA_LEN, 0, 0); + fd = bpf_map_create(type, NULL, sizeof(struct bpf_cgroup_storage_key), + TEST_DATA_LEN, 0, NULL); if (fd < 0) { if (skip_unsupported_map(type)) return -1; @@ -654,7 +653,7 @@ static int load_btf(void) memcpy(ptr, btf_str_sec, hdr.str_len); ptr += hdr.str_len; - btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0); + btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); free(raw_btf); if (btf_fd < 0) return -1; @@ -663,22 +662,17 @@ static int load_btf(void) static int create_map_spin_lock(void) { - struct bpf_create_map_attr attr = { - .name = "test_map", - .map_type = BPF_MAP_TYPE_ARRAY, - .key_size = 4, - .value_size = 8, - .max_entries = 1, + LIBBPF_OPTS(bpf_map_create_opts, opts, .btf_key_type_id = 1, .btf_value_type_id = 3, - }; + ); int fd, btf_fd; btf_fd = load_btf(); if (btf_fd < 0) return -1; - attr.btf_fd = btf_fd; - fd = bpf_create_map_xattr(&attr); + opts.btf_fd = btf_fd; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 8, 1, &opts); if (fd < 0) printf("Failed to create map with spin_lock\n"); return fd; @@ -686,24 +680,19 @@ static int create_map_spin_lock(void) static int create_sk_storage_map(void) { - struct bpf_create_map_attr attr = { - .name = "test_map", - .map_type = BPF_MAP_TYPE_SK_STORAGE, - .key_size = 4, - .value_size = 8, - .max_entries = 0, + LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC, .btf_key_type_id = 1, .btf_value_type_id = 3, - }; + ); int fd, btf_fd; btf_fd = load_btf(); if (btf_fd < 0) return -1; - attr.btf_fd = btf_fd; - fd = bpf_create_map_xattr(&attr); - close(attr.btf_fd); + opts.btf_fd = btf_fd; + fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "test_map", 4, 8, 0, &opts); + close(opts.btf_fd); if (fd < 0) printf("Failed to create sk_storage_map\n"); return fd; @@ -711,22 +700,18 @@ static int create_sk_storage_map(void) static int create_map_timer(void) { - struct bpf_create_map_attr attr = { - .name = "test_map", - .map_type = BPF_MAP_TYPE_ARRAY, - .key_size = 4, - .value_size = 16, - .max_entries = 1, + LIBBPF_OPTS(bpf_map_create_opts, opts, .btf_key_type_id = 1, .btf_value_type_id = 5, - }; + ); int fd, btf_fd; btf_fd = load_btf(); if (btf_fd < 0) return -1; - attr.btf_fd = btf_fd; - fd = bpf_create_map_xattr(&attr); + + opts.btf_fd = btf_fd; + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 16, 1, &opts); if (fd < 0) printf("Failed to create map with timer\n"); return fd; @@ -1089,7 +1074,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; - struct bpf_load_program_attr attr; + LIBBPF_OPTS(bpf_prog_load_opts, opts); int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; @@ -1129,32 +1114,34 @@ static void do_test_single(struct bpf_test *test, bool unpriv, test->result_unpriv : test->result; expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; - memset(&attr, 0, sizeof(attr)); - attr.prog_type = prog_type; - attr.expected_attach_type = test->expected_attach_type; - attr.insns = prog; - attr.insns_cnt = prog_len; - attr.license = "GPL"; + + opts.expected_attach_type = test->expected_attach_type; if (verbose) - attr.log_level = 1; + opts.log_level = 1; else if (expected_ret == VERBOSE_ACCEPT) - attr.log_level = 2; + opts.log_level = 2; else - attr.log_level = 4; - attr.prog_flags = pflags; + opts.log_level = 4; + opts.prog_flags = pflags; if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { - attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, - attr.expected_attach_type); - if (attr.attach_btf_id < 0) { + int attach_btf_id; + + attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, + opts.expected_attach_type); + if (attach_btf_id < 0) { printf("FAIL\nFailed to find BTF ID for '%s'!\n", test->kfunc); (*errors)++; return; } + + opts.attach_btf_id = attach_btf_id; } - fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); + opts.log_buf = bpf_vlog; + opts.log_size = sizeof(bpf_vlog); + fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); saved_errno = errno; /* BPF_PROG_TYPE_TRACING requires more setup and @@ -1407,6 +1394,9 @@ int main(int argc, char **argv) return EXIT_FAILURE; } + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + bpf_semi_rand_init(); return do_test(unpriv, from, to); } diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 800d503e5cb4..795b6798ccee 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ /* Copyright (C) 2020 Facebook, Inc. */ #include <stdlib.h> +#include <string.h> #include <errno.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> #include "testing_helpers.h" int parse_num_list(const char *s, bool **num_set, int *num_set_len) @@ -78,3 +82,61 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) } return info->prog_id; } + +int extra_prog_load_log_flags = 0; + +int bpf_prog_test_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, + .kernel_log_level = extra_prog_load_log_flags, + ); + struct bpf_object *obj; + struct bpf_program *prog; + __u32 flags; + int err; + + obj = bpf_object__open_file(file, &opts); + if (!obj) + return -errno; + + prog = bpf_object__next_program(obj, NULL); + if (!prog) { + err = -ENOENT; + goto err_out; + } + + if (type != BPF_PROG_TYPE_UNSPEC) + bpf_program__set_type(prog, type); + + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; + bpf_program__set_flags(prog, flags); + + err = bpf_object__load(obj); + if (err) + goto err_out; + + *pobj = obj; + *prog_fd = bpf_program__fd(prog); + + return 0; +err_out: + bpf_object__close(obj); + return err; +} + +int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .kern_version = kern_version, + .prog_flags = BPF_F_TEST_RND_HI32, + .log_level = extra_prog_load_log_flags, + .log_buf = log_buf, + .log_size = log_buf_sz, + ); + + return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts); +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index d4f8e749611b..f46ebc476ee8 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -6,3 +6,9 @@ int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); +int bpf_prog_test_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); +int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t insns_cnt, const char *license, + __u32 kern_version, char *log_buf, + size_t log_buf_sz); diff --git a/tools/testing/selftests/bpf/verifier/btf_ctx_access.c b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c new file mode 100644 index 000000000000..6340db6b46dc --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/btf_ctx_access.c @@ -0,0 +1,12 @@ +{ + "btf_ctx_access accept", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 8), /* load 2nd argument value (int pointer) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "bpf_modify_return_test", +}, diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c index d78627be060f..a2b006e2fd06 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c @@ -229,6 +229,24 @@ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sk_lookup, local_port)), + /* 1-byte read from ingress_ifindex field */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex)), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex) + 3), + /* 2-byte read from ingress_ifindex field */ + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex)), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2), + /* 4-byte read from ingress_ifindex field */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex)), + /* 8-byte read from sk field */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sk_lookup, sk)), @@ -351,6 +369,20 @@ .expected_attach_type = BPF_SK_LOOKUP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "invalid 8-byte read from bpf_sk_lookup ingress_ifindex field", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct bpf_sk_lookup, ingress_ifindex)), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, /* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */ { "invalid 4-byte read from bpf_sk_lookup sk field", diff --git a/tools/testing/selftests/bpf/verifier/ringbuf.c b/tools/testing/selftests/bpf/verifier/ringbuf.c new file mode 100644 index 000000000000..b64d33e4833c --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/ringbuf.c @@ -0,0 +1,95 @@ +{ + "ringbuf: invalid reservation offset 1", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* add invalid offset to reserved ringbuf memory */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xcafe), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "dereference of modified alloc_mem ptr R1", +}, +{ + "ringbuf: invalid reservation offset 2", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* spill R6(mem) into the stack */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + /* fill it back in R7 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, -8), + /* add invalid offset to reserved ringbuf memory */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 0xcafe), + /* should be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "R7 min value is outside of the allowed memory range", +}, +{ + "ringbuf: check passing rb mem to helpers", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + /* pass allocated ring buffer memory to fib lookup */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 8), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_fib_lookup), + /* submit the ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 2 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 6c907144311f..8cfc5349d2a8 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -59,6 +59,34 @@ .result_unpriv = ACCEPT, }, { + "check with invalid reg offset 0", + .insns = { + /* reserve 8 byte ringbuf memory */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* store a pointer to the reserved memory in R6 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* add invalid offset to memory or NULL */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + /* check whether the reservation was successful */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + /* should not be able to access *(R7) = 0 */ + BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0), + /* submit the reserved ringbuf memory */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 1 }, + .result = REJECT, + .errstr = "R0 pointer arithmetic on alloc_mem_or_null prohibited", +}, +{ "check corrupted spill/fill", .insns = { /* spill R1(ctx) into stack */ diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 027198768fad..b3afd43549fa 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -4,18 +4,35 @@ set -u set -e -# This script currently only works for x86_64, as -# it is based on the VM image used by the BPF CI which is -# x86_64. -QEMU_BINARY="${QEMU_BINARY:="qemu-system-x86_64"}" -X86_BZIMAGE="arch/x86/boot/bzImage" +# This script currently only works for x86_64 and s390x, as +# it is based on the VM image used by the BPF CI, which is +# available only for these architectures. +ARCH="$(uname -m)" +case "${ARCH}" in +s390x) + QEMU_BINARY=qemu-system-s390x + QEMU_CONSOLE="ttyS1" + QEMU_FLAGS=(-smp 2) + BZIMAGE="arch/s390/boot/compressed/vmlinux" + ;; +x86_64) + QEMU_BINARY=qemu-system-x86_64 + QEMU_CONSOLE="ttyS0,115200" + QEMU_FLAGS=(-cpu host -smp 8) + BZIMAGE="arch/x86/boot/bzImage" + ;; +*) + echo "Unsupported architecture" + exit 1 + ;; +esac DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/latest.config" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config" -INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX" +KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" +KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" LOG_FILE="${LOG_FILE_BASE}.log" @@ -85,7 +102,7 @@ newest_rootfs_version() { { for file in "${!URLS[@]}"; do - if [[ $file =~ ^libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then + if [[ $file =~ ^"${ARCH}"/libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then echo "${BASH_REMATCH[1]}" fi done @@ -102,7 +119,7 @@ download_rootfs() exit 1 fi - download "libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | + download "${ARCH}/libbpf-vmtest-rootfs-$rootfsversion.tar.zst" | zstd -d | sudo tar -C "$dir" -x } @@ -224,13 +241,12 @@ EOF -nodefaults \ -display none \ -serial mon:stdio \ - -cpu host \ + "${qemu_flags[@]}" \ -enable-kvm \ - -smp 8 \ -m 4G \ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ -kernel "${kernel_bzimage}" \ - -append "root=/dev/vda rw console=ttyS0,115200" + -append "root=/dev/vda rw console=${QEMU_CONSOLE}" } copy_logs() @@ -282,7 +298,7 @@ main() local kernel_checkout=$(realpath "${script_dir}"/../../../../) # By default the script searches for the kernel in the checkout directory but # it also obeys environment variables O= and KBUILD_OUTPUT= - local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}" + local kernel_bzimage="${kernel_checkout}/${BZIMAGE}" local command="${DEFAULT_COMMAND}" local update_image="no" local exit_command="poweroff -f" @@ -337,13 +353,13 @@ main() if is_rel_path "${O}"; then O="$(realpath "${PWD}/${O}")" fi - kernel_bzimage="${O}/${X86_BZIMAGE}" + kernel_bzimage="${O}/${BZIMAGE}" make_command="${make_command} O=${O}" elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then if is_rel_path "${KBUILD_OUTPUT}"; then KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")" fi - kernel_bzimage="${KBUILD_OUTPUT}/${X86_BZIMAGE}" + kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}" make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}" fi diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c index f5ffba341c17..51c8224b4ccc 100644 --- a/tools/testing/selftests/bpf/xdp_redirect_multi.c +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -85,10 +85,7 @@ int main(int argc, char **argv) { int prog_fd, group_all, mac_map; struct bpf_program *ingress_prog, *egress_prog; - struct bpf_prog_load_attr prog_load_attr = { - .prog_type = BPF_PROG_TYPE_UNSPEC, - }; - int i, ret, opt, egress_prog_fd = 0; + int i, err, ret, opt, egress_prog_fd = 0; struct bpf_devmap_val devmap_val; bool attach_egress_prog = false; unsigned char mac_addr[6]; @@ -147,10 +144,14 @@ int main(int argc, char **argv) printf("\n"); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - prog_load_attr.file = filename; - - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + obj = bpf_object__open_file(filename, NULL); + err = libbpf_get_error(obj); + if (err) + goto err_out; + err = bpf_object__load(obj); + if (err) goto err_out; + prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL)); if (attach_egress_prog) group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index 30f12637f4e4..baa870a759a2 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -22,6 +22,7 @@ #include "bpf/libbpf.h" #include "xdping.h" +#include "testing_helpers.h" static int ifindex; static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -173,7 +174,7 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { + if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) { fprintf(stderr, "load of %s failed\n", filename); return 1; } diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 6c7cf8aadc79..0a5d23da486d 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -100,6 +100,12 @@ #include "xdpxceiver.h" #include "../kselftest.h" +/* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. + * Until xdpxceiver is either moved or re-writed into libxdp, suppress + * deprecation warnings in this file + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; static const char *IP1 = "192.168.100.162"; @@ -744,7 +750,6 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); struct xsk_umem_info *umem = xsk->umem; u32 idx_rx = 0, idx_fq = 0, rcvd, i; - u32 total = 0; int ret; while (pkt) { @@ -799,7 +804,6 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * pthread_mutex_lock(&pacing_mutex); pkts_in_flight -= rcvd; - total += rcvd; if (pkts_in_flight < umem->num_frames) pthread_cond_signal(&pacing_cond); pthread_mutex_unlock(&pacing_mutex); @@ -1219,7 +1223,7 @@ static bool hugepages_present(struct ifobject *ifobject) void *bufs; bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (bufs == MAP_FAILED) return false; @@ -1366,6 +1370,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC: + if (!hugepages_present(test->ifobj_tx)) { + ksft_test_result_skip("No 2M huge pages present.\n"); + return; + } test_spec_set_name(test, "UNALIGNED_INV_DESC"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 59e222460581..745fe25fa0b9 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -11,10 +11,12 @@ TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill +LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h + include ../lib.mk -$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h -$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h -$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h -$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h -$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h +$(OUTPUT)/test_memcontrol: cgroup_util.c +$(OUTPUT)/test_kmem: cgroup_util.c +$(OUTPUT)/test_core: cgroup_util.c +$(OUTPUT)/test_freezer: cgroup_util.c +$(OUTPUT)/test_kill: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 82e59cdf16e7..4f66d10626d2 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -2,9 +2,9 @@ #include <stdbool.h> #include <stdlib.h> -#define PAGE_SIZE 4096 +#include "../kselftest.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define PAGE_SIZE 4096 #define MB(x) (x << 20) diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index 42be3b925830..076cf4325f78 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -52,6 +52,12 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) size = sizeof(struct __clone_args); switch (test_mode) { + case CLONE3_ARGS_NO_TEST: + /* + * Uses default 'flags' and 'SIGCHLD' + * assignment. + */ + break; case CLONE3_ARGS_ALL_0: args.flags = 0; args.exit_signal = 0; diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index aa7d13d91963..749239930ca8 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -50,10 +50,6 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd, return syscall(__NR_close_range, fd, max_fd, flags); } -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - TEST(core_close_range) { int i, ret; diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index 10e0f3dbc930..5f6eb965cfd1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -217,9 +217,11 @@ short_payload_get() dest_mac=$(mac_get $h1) p=$(: )"08:"$( : VXLAN flags - )"01:00:00:"$( : VXLAN reserved + )"00:00:00:"$( : VXLAN reserved )"00:03:e8:"$( : VXLAN VNI : 1000 )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr ) echo $p } @@ -263,7 +265,8 @@ decap_error_test() corrupted_packet_test "Decap error: Reserved bits in use" \ "reserved_bits_payload_get" - corrupted_packet_test "Decap error: No L2 header" "short_payload_get" + corrupted_packet_test "Decap error: Too short inner packet" \ + "short_payload_get" } mc_smac_payload_get() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh new file mode 100755 index 000000000000..f6c16cbb6cf7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh @@ -0,0 +1,342 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel drops and exceptions functionality over mlxsw. +# Check all traps to make sure they are triggered under the right +# conditions. + +# +------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|-------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRF2 | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test + overlay_smac_is_mc_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx1 master br1 + ip link set dev vx1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:3::1/64 +} + +switch_destroy() +{ + ip address del dev $rp1 2001:db8:3::1/64 + ip link set dev $rp1 down + + ip link set dev vx1 down + ip link set dev vx1 nomaster + ip link del dev vx1 + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link del dev br1 +} + +vrf2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + forwarding_restore + vrf_cleanup +} + +ecn_payload_get() +{ + local dest_mac=$(mac_get $h1) + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp \ + sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +reserved_bits_payload_get() +{ + local dest_mac=$(mac_get $h1) + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +short_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + ) + echo $p +} + +corrupted_packet_test() +{ + local trap_name="decap_error" + local desc=$1; shift + local payload_get=$1; shift + local mz_pid + + RET=0 + + # In case of too short packet, there is no any inner packet, + # so the matching will always succeed + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \ + action pass + + rp1_mac=$(mac_get $rp1) + payload=$($payload_get) + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower +} + +decap_error_test() +{ + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + corrupted_packet_test "Decap error: Reserved bits in use" \ + "reserved_bits_payload_get" + corrupted_packet_test "Decap error: Too short inner packet" \ + "short_payload_get" +} + +mc_smac_payload_get() +{ + local dest_mac=$(mac_get $h1) + local source_mac="01:02:03:04:05:06" + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"0:0"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) + echo $p +} + +overlay_smac_is_mc_test() +{ + local trap_name="overlay_smac_is_mc" + local mz_pid + + RET=0 + + # The matching will be checked on devlink_trap_drop_test() + # and the filter will be removed on devlink_trap_drop_cleanup() + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_mac 01:02:03:04:05:06 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(mc_smac_payload_get) + + ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $swp1 101 + + log_test "Overlay source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh new file mode 100755 index 000000000000..429f7ee735cf --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh @@ -0,0 +1,322 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to four IPv6 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +-----------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 2001:db8:2::1 | | +# | | remote 2001:db8:2::{2..17} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:2::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning \ + udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local 2001:db8:2::1 dstport 4789 + + ip address add 2001:db8:2::1/128 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 2001:db8:2::1/128 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 2001:db8:3::1/64 dev $rp1 + ip route add 2001:db8:2::0/64 via 2001:db8:3::2 +} + +router1_destroy() +{ + ip route del 2001:db8:2::0/64 via 2001:db8:3::2 + ip address del 2001:db8:3::1/64 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 2001:db8:3::2/64 +} + +router2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 2001:db8:2::$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \ + flower ip_proto udp dst_ip 2001:db8:2::$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ipv6 pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 16 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=16 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 10..13 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + RET=0 + + packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 8 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh new file mode 100755 index 000000000000..d8fd875ad527 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh @@ -0,0 +1,334 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to five IPv6 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +-----------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 2001:db8:1::1/64 | +# +----|------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 2001:db8:2::1 | | +# | | remote 2001:db8:2::{2..21} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:2::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning \ + udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local 2001:db8:2::1 dstport 4789 + + ip address add 2001:db8:2::1/128 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 2001:db8:2::1/128 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 2001:db8:3::1/64 dev $rp1 + ip route add 2001:db8:2::0/64 via 2001:db8:3::2 +} + +router1_destroy() +{ + ip route del 2001:db8:2::0/64 via 2001:db8:3::2 + ip address del 2001:db8:3::1/64 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 2001:db8:3::2/64 +} + +router2_destroy() +{ + simple_if_fini $rp2 2001:db8:3::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 2001:db8:2::$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \ + flower ip_proto udp dst_ip 2001:db8:2::$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ipv6 pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 20 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=20 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 12..16 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 8 packets" + + RET=0 + + packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11 + + $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 9 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 729a86cc4ede..99a332b712f0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -4,10 +4,35 @@ # Test various aspects of VxLAN offloading which are specific to mlxsw, such # as sanitization of invalid configurations and offload indication. -lib_dir=$(dirname $0)/../../../net/forwarding +: ${ADDR_FAMILY:=ipv4} +export ADDR_FAMILY + +: ${LOCAL_IP_1:=198.51.100.1} +export LOCAL_IP_1 + +: ${LOCAL_IP_2:=198.51.100.2} +export LOCAL_IP_2 + +: ${PREFIX_LEN:=32} +export PREFIX_LEN + +: ${UDPCSUM_FLAFS:=noudpcsum} +export UDPCSUM_FLAFS + +: ${MC_IP:=239.0.0.1} +export MC_IP -ALL_TESTS="sanitization_test offload_indication_test \ - sanitization_vlan_aware_test offload_indication_vlan_aware_test" +: ${IP_FLAG:=""} +export IP_FLAG + +: ${ALL_TESTS:=" + sanitization_test + offload_indication_test + sanitization_vlan_aware_test + offload_indication_vlan_aware_test +"} + +lib_dir=$(dirname $0)/../../../net/forwarding NUM_NETIFS=2 : ${TIMEOUT:=20000} # ms source $lib_dir/lib.sh @@ -63,8 +88,8 @@ sanitization_single_dev_valid_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_pass @@ -80,8 +105,8 @@ sanitization_single_dev_vlan_aware_test() ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_pass @@ -97,8 +122,8 @@ sanitization_single_dev_mcast_enabled_test() ip link add dev br0 type bridge - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -115,9 +140,9 @@ sanitization_single_dev_mcast_group_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add name dummy1 up type dummy - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ - dev dummy1 group 239.0.0.1 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ + dev dummy1 group $MC_IP sanitization_single_dev_test_fail @@ -134,7 +159,7 @@ sanitization_single_dev_no_local_ip_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit dstport 4789 sanitization_single_dev_test_fail @@ -145,31 +170,14 @@ sanitization_single_dev_no_local_ip_test() log_test "vxlan device with no local ip" } -sanitization_single_dev_local_ipv6_test() -{ - RET=0 - - ip link add dev br0 type bridge mcast_snooping 0 - - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 2001:db8::1 dstport 4789 - - sanitization_single_dev_test_fail - - ip link del dev vxlan0 - ip link del dev br0 - - log_test "vxlan device with local ipv6 address" -} - -sanitization_single_dev_learning_enabled_test() +sanitization_single_dev_learning_enabled_ipv4_test() { RET=0 ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_pass @@ -186,8 +194,8 @@ sanitization_single_dev_local_interface_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add name dummy1 up type dummy - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1 sanitization_single_dev_test_fail @@ -204,8 +212,8 @@ sanitization_single_dev_port_range_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \ srcport 4000 5000 sanitization_single_dev_test_fail @@ -222,8 +230,8 @@ sanitization_single_dev_tos_static_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos 20 local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -239,8 +247,8 @@ sanitization_single_dev_ttl_inherit_test() ip link add dev br0 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl inherit tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -250,14 +258,14 @@ sanitization_single_dev_ttl_inherit_test() log_test "vxlan device with inherit ttl" } -sanitization_single_dev_udp_checksum_test() +sanitization_single_dev_udp_checksum_ipv4_test() { RET=0 ip link add dev br0 type bridge mcast_snooping 0 ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_single_dev_test_fail @@ -276,13 +284,12 @@ sanitization_single_dev_test() sanitization_single_dev_mcast_enabled_test sanitization_single_dev_mcast_group_test sanitization_single_dev_no_local_ip_test - sanitization_single_dev_local_ipv6_test - sanitization_single_dev_learning_enabled_test + sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test sanitization_single_dev_local_interface_test sanitization_single_dev_port_range_test sanitization_single_dev_tos_static_test sanitization_single_dev_ttl_inherit_test - sanitization_single_dev_udp_checksum_test + sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test } sanitization_multi_devs_test_pass() @@ -334,10 +341,10 @@ sanitization_multi_devs_valid_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_multi_devs_test_pass @@ -356,10 +363,10 @@ sanitization_multi_devs_ttl_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 40 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789 sanitization_multi_devs_test_fail @@ -378,10 +385,10 @@ sanitization_multi_devs_udp_dstport_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 5789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789 sanitization_multi_devs_test_fail @@ -400,10 +407,10 @@ sanitization_multi_devs_local_ip_test() ip link add dev br0 type bridge mcast_snooping 0 ip link add dev br1 type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 - ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.2 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789 sanitization_multi_devs_test_fail @@ -443,12 +450,12 @@ offload_indication_setup_create() ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 } offload_indication_setup_destroy() @@ -456,7 +463,7 @@ offload_indication_setup_destroy() ip link del dev vxlan1 ip link del dev vxlan0 - ip address del 198.51.100.1/32 dev lo + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo ip link set dev $swp2 nomaster ip link set dev $swp1 nomaster @@ -469,7 +476,7 @@ offload_indication_fdb_flood_test() { RET=0 - bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \ bridge fdb show brport vxlan0 @@ -485,7 +492,7 @@ offload_indication_fdb_bridge_test() RET=0 bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ - dst 198.51.100.2 + dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 @@ -536,7 +543,7 @@ offload_indication_fdb_bridge_test() # marked as offloaded in both drivers RET=0 - bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0 check_err $? @@ -560,17 +567,17 @@ offload_indication_decap_route_test() RET=0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev vxlan0 down busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev vxlan1 down busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vxlan device down" @@ -579,26 +586,26 @@ offload_indication_decap_route_test() ip link set dev vxlan1 up busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev vxlan0 up busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vxlan device up" RET=0 - ip address delete 198.51.100.1/32 dev lo + ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - add local route" @@ -607,18 +614,18 @@ offload_indication_decap_route_test() ip link set dev $swp1 nomaster busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev $swp2 nomaster busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - local ports enslavement" @@ -627,12 +634,12 @@ offload_indication_decap_route_test() ip link del dev br0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link del dev br1 busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - bridge device deletion" @@ -646,25 +653,25 @@ offload_indication_decap_route_test() ip link set dev vxlan0 master br0 ip link set dev vxlan1 master br1 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link del dev vxlan0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? ip link del dev vxlan1 busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vxlan device deletion" ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 } check_fdb_offloaded() @@ -721,10 +728,10 @@ __offload_indication_join_vxlan_first() local mac=00:11:22:33:44:55 local zmac=00:00:00:00:00:00 - bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 ip link set dev vxlan0 master br0 - bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 + bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2 RET=0 check_vxlan_fdb_not_offloaded @@ -774,8 +781,8 @@ __offload_indication_join_vxlan_first() offload_indication_join_vxlan_first() { ip link add dev br0 up type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_first @@ -789,7 +796,7 @@ __offload_indication_join_vxlan_last() RET=0 - bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 ip link set dev $swp1 master br0 @@ -809,8 +816,8 @@ __offload_indication_join_vxlan_last() offload_indication_join_vxlan_last() { ip link add dev br0 up type bridge mcast_snooping 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_last @@ -837,10 +844,10 @@ sanitization_vlan_aware_test() ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 # Test that when each VNI is mapped to a different VLAN we can enslave # a port to the bridge @@ -884,20 +891,20 @@ sanitization_vlan_aware_test() # Use the offload indication of the local route to ensure the VXLAN # configuration was correctly rollbacked. - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo ip link set dev vxlan10 type vxlan ttl 10 ip link set dev $swp1 master br0 &> /dev/null check_fail $? busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? log_test "vlan-aware - failed enslavement to bridge due to conflict" ip link set dev vxlan10 type vxlan ttl 20 - ip address del 198.51.100.1/32 dev lo + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo ip link del dev vxlan20 ip link del dev vxlan10 @@ -916,12 +923,12 @@ offload_indication_vlan_aware_setup_create() bridge vlan add vid 10 dev $swp1 bridge vlan add vid 20 dev $swp1 - ip address add 198.51.100.1/32 dev lo + ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ - noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged @@ -935,7 +942,7 @@ offload_indication_vlan_aware_setup_destroy() ip link del dev vxlan20 ip link del dev vxlan10 - ip address del 198.51.100.1/32 dev lo + ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo bridge vlan del vid 20 dev $swp1 bridge vlan del vid 10 dev $swp1 @@ -952,7 +959,7 @@ offload_indication_vlan_aware_fdb_test() log_info "vxlan entry offload indication - vlan-aware" bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ - dst 198.51.100.2 vlan 10 + dst $LOCAL_IP_2 vlan 10 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 @@ -1003,7 +1010,7 @@ offload_indication_vlan_aware_fdb_test() # marked as offloaded in both drivers RET=0 - bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2 busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10 check_err $? @@ -1021,7 +1028,7 @@ offload_indication_vlan_aware_decap_route_test() RET=0 busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? # Toggle PVID flag on one VxLAN device and make sure route is still @@ -1029,7 +1036,7 @@ offload_indication_vlan_aware_decap_route_test() bridge vlan add vid 10 dev vxlan10 untagged busywait "$TIMEOUT" wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? # Toggle PVID flag on second VxLAN device and make sure route is no @@ -1037,14 +1044,15 @@ offload_indication_vlan_aware_decap_route_test() bridge vlan add vid 20 dev vxlan20 untagged busywait "$TIMEOUT" not wait_for_offload \ - ip route show table local 198.51.100.1 + ip $IP_FLAG route show table local $LOCAL_IP_1 check_err $? # Toggle PVID flag back and make sure route is marked as offloaded bridge vlan add vid 10 dev vxlan10 pvid untagged bridge vlan add vid 20 dev vxlan20 pvid untagged - busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1 + busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \ + $LOCAL_IP_1 check_err $? log_test "vxlan decap route - vni map/unmap" @@ -1054,8 +1062,8 @@ offload_indication_vlan_aware_join_vxlan_first() { ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_first 1 @@ -1067,8 +1075,8 @@ offload_indication_vlan_aware_join_vxlan_last() { ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 __offload_indication_join_vxlan_last @@ -1085,14 +1093,14 @@ offload_indication_vlan_aware_l3vni_test() sysctl_set net.ipv6.conf.default.disable_ipv6 1 ip link add dev br0 up type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 0 - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 ip link set dev $swp1 master br0 # The test will use the offload indication on the FDB entry to # understand if the tunnel is offloaded or not - bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1 + bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2 ip link set dev vxlan0 master br0 bridge vlan add dev vxlan0 vid 10 pvid untagged diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh index 749ba3cfda1d..38148f51877a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh @@ -4,6 +4,21 @@ # Test vetoing of FDB entries that mlxsw can not offload. This exercises several # different veto vectors to test various rollback scenarios in the vxlan driver. +: ${LOCAL_IP:=198.51.100.1} +export LOCAL_IP + +: ${REMOTE_IP_1:=198.51.100.2} +export REMOTE_IP_1 + +: ${REMOTE_IP_2:=198.51.100.3} +export REMOTE_IP_2 + +: ${UDPCSUM_FLAFS:=noudpcsum} +export UDPCSUM_FLAFS + +: ${MC_IP:=224.0.0.1} +export MC_IP + lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" @@ -26,8 +41,8 @@ setup_prepare() ip link set dev $swp1 master br0 ip link set dev $swp2 up - ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP dstport 4789 ip link set dev vxlan0 master br0 } @@ -50,11 +65,11 @@ fdb_create_veto_test() RET=0 bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ - dst 198.51.100.2 2>/dev/null + dst $REMOTE_IP_1 2>/dev/null check_fail $? "multicast MAC not rejected" bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \ - dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum + dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum check_err $? "multicast MAC rejected without extack" log_test "vxlan FDB veto - create" @@ -65,15 +80,15 @@ fdb_replace_veto_test() RET=0 bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \ - dst 198.51.100.2 + dst $REMOTE_IP_1 check_err $? "valid FDB rejected" bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ - dst 198.51.100.2 port 1234 2>/dev/null + dst $REMOTE_IP_1 port 1234 2>/dev/null check_fail $? "FDB with an explicit port not rejected" bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \ - dst 198.51.100.2 port 1234 2>&1 >/dev/null \ + dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with an explicit port rejected without extack" @@ -85,15 +100,15 @@ fdb_append_veto_test() RET=0 bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \ - dst 198.51.100.2 + dst $REMOTE_IP_1 check_err $? "valid FDB rejected" bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ - dst 198.51.100.3 port 1234 2>/dev/null + dst $REMOTE_IP_2 port 1234 2>/dev/null check_fail $? "FDB with an explicit port not rejected" bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \ - dst 198.51.100.3 port 1234 2>&1 >/dev/null \ + dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with an explicit port rejected without extack" @@ -105,11 +120,11 @@ fdb_changelink_veto_test() RET=0 ip link set dev vxlan0 type vxlan \ - group 224.0.0.1 dev lo 2>/dev/null + group $MC_IP dev lo 2>/dev/null check_fail $? "FDB with a multicast IP not rejected" ip link set dev vxlan0 type vxlan \ - group 224.0.0.1 dev lo 2>&1 >/dev/null \ + group $MC_IP dev lo 2>&1 >/dev/null \ | grep -q mlxsw_spectrum check_err $? "FDB with a multicast IP rejected without extack" diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh new file mode 100755 index 000000000000..66c87aab86f6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN test for IPv6. + +LOCAL_IP=2001:db8:1::1 +REMOTE_IP_1=2001:db8:2::1 +REMOTE_IP_2=2001:db8:3::1 +UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx" +MC_IP=FF02::2 + +source vxlan_fdb_veto.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh new file mode 100755 index 000000000000..f2ea0163ddea --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN test for IPv6. + +ADDR_FAMILY=ipv6 +LOCAL_IP_1=2001:db8:1::1 +LOCAL_IP_2=2001:db8:1::2 +PREFIX_LEN=128 +UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx" +MC_IP=FF02::2 +IP_FLAG="-6" + +ALL_TESTS=" + sanitization_test + offload_indication_test + sanitization_vlan_aware_test + offload_indication_vlan_aware_test +" + +sanitization_single_dev_learning_enabled_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \ + ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_udp_checksum_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning \ + noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \ + local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + log_test "vxlan device without zero udp checksum at RX" + + ip link del dev vxlan0 + + ip link add name vxlan0 up type vxlan id 10 nolearning \ + udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \ + local $LOCAL_IP_1 dstport 4789 + + sanitization_single_dev_test_fail + log_test "vxlan device without zero udp checksum at TX" + + ip link del dev vxlan0 + ip link del dev br0 + +} + +source vxlan.sh diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index 98166fa3eb91..34fb89b0c61f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -1,6 +1,6 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -# description: Kprobe dynamic event - adding and removing +# description: Kprobe profile # requires: kprobe_events ! grep -q 'myevent' kprobe_profile diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore index a4969f7ee020..ededb077a3a6 100644 --- a/tools/testing/selftests/gpio/.gitignore +++ b/tools/testing/selftests/gpio/.gitignore @@ -1,2 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only gpio-mockup-cdev +gpio-chip-info +gpio-line-name diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index d7b312b44a62..71b306602368 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := gpio-mockup.sh +TEST_PROGS := gpio-mockup.sh gpio-sim.sh TEST_FILES := gpio-mockup-sysfs.sh -TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev +TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name CFLAGS += -O2 -g -Wall -I../../../../usr/include/ include ../lib.mk diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config index ce100342c20b..409a8532facc 100644 --- a/tools/testing/selftests/gpio/config +++ b/tools/testing/selftests/gpio/config @@ -1,3 +1,4 @@ CONFIG_GPIOLIB=y CONFIG_GPIO_CDEV=y CONFIG_GPIO_MOCKUP=m +CONFIG_GPIO_SIM=m diff --git a/tools/testing/selftests/gpio/gpio-chip-info.c b/tools/testing/selftests/gpio/gpio-chip-info.c new file mode 100644 index 000000000000..fdc07e742fba --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-chip-info.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GPIO character device helper for reading chip information. + * + * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl> + */ + +#include <fcntl.h> +#include <linux/gpio.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> + +static void print_usage(void) +{ + printf("usage:\n"); + printf(" gpio-chip-info <chip path> [name|label|num-lines]\n"); +} + +int main(int argc, char **argv) +{ + struct gpiochip_info info; + int fd, ret; + + if (argc != 3) { + print_usage(); + return EXIT_FAILURE; + } + + fd = open(argv[1], O_RDWR); + if (fd < 0) { + perror("unable to open the GPIO chip"); + return EXIT_FAILURE; + } + + memset(&info, 0, sizeof(info)); + ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &info); + if (ret) { + perror("chip info ioctl failed"); + return EXIT_FAILURE; + } + + if (strcmp(argv[2], "name") == 0) { + printf("%s\n", info.name); + } else if (strcmp(argv[2], "label") == 0) { + printf("%s\n", info.label); + } else if (strcmp(argv[2], "num-lines") == 0) { + printf("%u\n", info.lines); + } else { + fprintf(stderr, "unknown command: %s\n", argv[2]); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/gpio/gpio-line-name.c b/tools/testing/selftests/gpio/gpio-line-name.c new file mode 100644 index 000000000000..e635cfadbded --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-line-name.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * GPIO character device helper for reading line names. + * + * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl> + */ + +#include <fcntl.h> +#include <linux/gpio.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> + +static void print_usage(void) +{ + printf("usage:\n"); + printf(" gpio-line-name <chip path> <line offset>\n"); +} + +int main(int argc, char **argv) +{ + struct gpio_v2_line_info info; + int fd, ret; + char *endp; + + if (argc != 3) { + print_usage(); + return EXIT_FAILURE; + } + + fd = open(argv[1], O_RDWR); + if (fd < 0) { + perror("unable to open the GPIO chip"); + return EXIT_FAILURE; + } + + memset(&info, 0, sizeof(info)); + info.offset = strtoul(argv[2], &endp, 10); + if (*endp != '\0') { + print_usage(); + return EXIT_FAILURE; + } + + ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &info); + if (ret) { + perror("line info ioctl failed"); + return EXIT_FAILURE; + } + + printf("%s\n", info.name); + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh new file mode 100755 index 000000000000..341e3de00896 --- /dev/null +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -0,0 +1,396 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl> + +BASE_DIR=`dirname $0` +CONFIGFS_DIR="/sys/kernel/config/gpio-sim" +MODULE="gpio-sim" + +fail() { + echo "$*" >&2 + echo "GPIO $MODULE test FAIL" + exit 1 +} + +skip() { + echo "$*" >&2 + echo "GPIO $MODULE test SKIP" + exit 4 +} + +remove_chip() { + local CHIP=$1 + + for FILE in $CONFIGFS_DIR/$CHIP/*; do + BANK=`basename $FILE` + if [ "$BANK" = "live" -o "$BANK" = "dev_name" ]; then + continue + fi + + LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | egrep ^line` + if [ "$?" = 0 ]; then + for LINE in $LINES; do + if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then + rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog || \ + fail "Unable to remove the hog" + fi + + rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE || \ + fail "Unable to remove the line" + done + fi + + rmdir $CONFIGFS_DIR/$CHIP/$BANK + done + + rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip" +} + +configfs_cleanup() { + for CHIP in `ls $CONFIGFS_DIR/`; do + remove_chip $CHIP + done +} + +create_chip() { + local CHIP=$1 + + mkdir $CONFIGFS_DIR/$CHIP +} + +create_bank() { + local CHIP=$1 + local BANK=$2 + + mkdir $CONFIGFS_DIR/$CHIP/$BANK +} + +set_label() { + local CHIP=$1 + local BANK=$2 + local LABEL=$3 + + echo $LABEL > $CONFIGFS_DIR/$CHIP/$BANK/label || fail "Unable to set the chip label" +} + +set_num_lines() { + local CHIP=$1 + local BANK=$2 + local NUM_LINES=$3 + + echo $NUM_LINES > $CONFIGFS_DIR/$CHIP/$BANK/num_lines || \ + fail "Unable to set the number of lines" +} + +set_line_name() { + local CHIP=$1 + local BANK=$2 + local OFFSET=$3 + local NAME=$4 + local LINE_DIR=$CONFIGFS_DIR/$CHIP/$BANK/line$OFFSET + + test -d $LINE_DIR || mkdir $LINE_DIR + echo $NAME > $LINE_DIR/name || fail "Unable to set the line name" +} + +enable_chip() { + local CHIP=$1 + + echo 1 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to enable the chip" +} + +disable_chip() { + local CHIP=$1 + + echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip" +} + +configfs_chip_name() { + local CHIP=$1 + local BANK=$2 + + cat $CONFIGFS_DIR/$CHIP/$BANK/chip_name 2> /dev/null || \ + fail "unable to read the chip name from configfs" +} + +configfs_dev_name() { + local CHIP=$1 + + cat $CONFIGFS_DIR/$CHIP/dev_name 2> /dev/null || \ + fail "unable to read the device name from configfs" +} + +get_chip_num_lines() { + local CHIP=$1 + local BANK=$2 + + $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` num-lines || \ + fail "unable to read the number of lines from the character device" +} + +get_chip_label() { + local CHIP=$1 + local BANK=$2 + + $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` label || \ + fail "unable to read the chip label from the character device" +} + +get_line_name() { + local CHIP=$1 + local BANK=$2 + local OFFSET=$3 + + $BASE_DIR/gpio-line-name /dev/`configfs_chip_name $CHIP $BANK` $OFFSET || \ + fail "unable to read the line name from the character device" +} + +sysfs_set_pull() { + local DEV=$1 + local BANK=$2 + local OFFSET=$3 + local PULL=$4 + local DEVNAME=`configfs_dev_name $DEV` + local CHIPNAME=`configfs_chip_name $DEV $BANK` + local SYSFSPATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull" + + echo $PULL > $SYSFSPATH || fail "Unable to set line pull in sysfs" +} + +# Load the gpio-sim module. This will pull in configfs if needed too. +modprobe gpio-sim || skip "unable to load the gpio-sim module" +# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed. +for IDX in `seq 5`; do + if [ "$IDX" -eq "5" ]; then + skip "configfs not mounted at /sys/kernel/config" + fi + + mountpoint -q /sys/kernel/config && break + sleep 0.1 +done +# If the module was already loaded: remove all previous chips +configfs_cleanup + +trap "exit 1" SIGTERM SIGINT +trap configfs_cleanup EXIT + +echo "1. chip_name and dev_name attributes" + +echo "1.1. Chip name is communicated to user" +create_chip chip +create_bank chip bank +enable_chip chip +test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work" +remove_chip chip + +echo "1.2. chip_name returns 'none' if the chip is still pending" +create_chip chip +create_bank chip bank +test "`cat $CONFIGFS_DIR/chip/bank/chip_name`" = "none" || \ + fail "chip_name doesn't return 'none' for a pending chip" +remove_chip chip + +echo "1.3. Device name is communicated to user" +create_chip chip +create_bank chip bank +enable_chip chip +test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work" +remove_chip chip + +echo "2. Creating and configuring simulated chips" + +echo "2.1. Default number of lines is 1" +create_chip chip +create_bank chip bank +enable_chip chip +test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1" +remove_chip chip + +echo "2.2. Number of lines can be specified" +create_chip chip +create_bank chip bank +set_num_lines chip bank 16 +enable_chip chip +test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16" +remove_chip chip + +echo "2.3. Label can be set" +create_chip chip +create_bank chip bank +set_label chip bank foobar +enable_chip chip +test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect" +remove_chip chip + +echo "2.4. Label can be left empty" +create_chip chip +create_bank chip bank +enable_chip chip +test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty" +remove_chip chip + +echo "2.5. Line names can be configured" +create_chip chip +create_bank chip bank +set_num_lines chip bank 16 +set_line_name chip bank 0 foo +set_line_name chip bank 2 bar +enable_chip chip +test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect" +test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect" +remove_chip chip + +echo "2.6. Line config can remain unused if offset is greater than number of lines" +create_chip chip +create_bank chip bank +set_num_lines chip bank 2 +set_line_name chip bank 5 foobar +enable_chip chip +test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect" +test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect" +remove_chip chip + +echo "2.7. Line configfs directory names are sanitized" +create_chip chip +create_bank chip bank +mkdir $CONFIGFS_DIR/chip/bank/line12foobar 2> /dev/null && \ + fail "invalid configfs line name accepted" +mkdir $CONFIGFS_DIR/chip/bank/line_no_offset 2> /dev/null && \ + fail "invalid configfs line name accepted" +remove_chip chip + +echo "2.8. Multiple chips can be created" +CHIPS="chip0 chip1 chip2" +for CHIP in $CHIPS; do + create_chip $CHIP + create_bank $CHIP bank + enable_chip $CHIP +done +for CHIP in $CHIPS; do + remove_chip $CHIP +done + +echo "2.9. Can't modify settings when chip is live" +create_chip chip +create_bank chip bank +enable_chip chip +echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \ + fail "Setting label of a live chip should fail" +echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \ + fail "Setting number of lines of a live chip should fail" +remove_chip chip + +echo "2.10. Can't create line items when chip is live" +create_chip chip +create_bank chip bank +enable_chip chip +mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail" +remove_chip chip + +echo "2.11. Probe errors are propagated to user-space" +create_chip chip +create_bank chip bank +set_num_lines chip bank 99999 +echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Probe error was not propagated" +remove_chip chip + +echo "2.12. Cannot enable a chip without any GPIO banks" +create_chip chip +echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Chip enabled without any GPIO banks" +remove_chip chip + +echo "2.13. Duplicate chip labels are not allowed" +create_chip chip +create_bank chip bank0 +set_label chip bank0 foobar +create_bank chip bank1 +set_label chip bank1 foobar +echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Duplicate chip labels were not rejected" +remove_chip chip + +echo "2.14. Lines can be hogged" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog +enable_chip chip +$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \ + fail "Setting the value of a hogged line shouldn't succeed" +remove_chip chip + +echo "3. Controlling simulated chips" + +echo "3.1. Pull can be set over sysfs" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +sysfs_set_pull chip bank 0 pull-up +$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 0 +test "$?" = "1" || fail "pull set incorrectly" +sysfs_set_pull chip bank 0 pull-down +$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1 +test "$?" = "0" || fail "pull set incorrectly" +remove_chip chip + +echo "3.2. Pull can be read from sysfs" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull +test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed" +sysfs_set_pull chip bank 0 pull-up +test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed" +remove_chip chip + +echo "3.3. Incorrect input in sysfs is rejected" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull" +echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected" +remove_chip chip + +echo "3.4. Can't write to value" +create_chip chip +create_bank chip bank +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" +echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly" +remove_chip chip + +echo "4. Simulated GPIO chips are functional" + +echo "4.1. Values can be read from sysfs" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +DEVNAME=`configfs_dev_name chip` +CHIPNAME=`configfs_chip_name chip bank` +SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value" +test `cat $SYSFS_PATH` = "0" || fail "incorrect value read from sysfs" +$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 & +sleep 0.1 # FIXME Any better way? +test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs" +kill $! +remove_chip chip + +echo "4.2. Bias settings work correctly" +create_chip chip +create_bank chip bank +set_num_lines chip bank 8 +enable_chip chip +$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0 +test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work" +remove_chip chip + +echo "GPIO $MODULE test PASS" diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c index af7f9c7d59bc..06256c96df12 100644 --- a/tools/testing/selftests/ir/ir_loopback.c +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -26,7 +26,6 @@ #include "../kselftest.h" #define TEST_SCANCODES 10 -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define SYSFS_PATH_MAX 256 #define DNAME_PATH_MAX 256 diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index aa91d2063249..806a150648c3 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -4,7 +4,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -ifeq ($(ARCH),x86) +ifeq ($(ARCH),$(filter $(ARCH),x86 ppc64le)) TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh TEST_FILES := kexec_common_lib.sh diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh index 43017cfe88f7..0e114b34d5d7 100755 --- a/tools/testing/selftests/kexec/kexec_common_lib.sh +++ b/tools/testing/selftests/kexec/kexec_common_lib.sh @@ -91,6 +91,27 @@ get_efi_var_secureboot_mode() return 0; } +# On powerpc platform, check device-tree property +# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing +# to detect secureboot state. +get_ppc64_secureboot_mode() +{ + local secure_boot_file="/proc/device-tree/ibm,secureboot/os-secureboot-enforcing" + # Check for secure boot file existence + if [ -f $secure_boot_file ]; then + log_info "Secureboot is enabled (Device tree)" + return 1; + fi + log_info "Secureboot is not enabled (Device tree)" + return 0; +} + +# Return the architecture of the system +get_arch() +{ + echo $(arch) +} + # Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID). # The secure boot mode can be accessed either as the last integer # of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from @@ -100,14 +121,19 @@ get_efi_var_secureboot_mode() get_secureboot_mode() { local secureboot_mode=0 + local system_arch=$(get_arch) - get_efivarfs_secureboot_mode - secureboot_mode=$? - - # fallback to using the efi_var files - if [ $secureboot_mode -eq 0 ]; then - get_efi_var_secureboot_mode + if [ "$system_arch" == "ppc64le" ]; then + get_ppc64_secureboot_mode + secureboot_mode=$? + else + get_efivarfs_secureboot_mode secureboot_mode=$? + # fallback to using the efi_var files + if [ $secureboot_mode -eq 0 ]; then + get_efi_var_secureboot_mode + secureboot_mode=$? + fi fi if [ $secureboot_mode -eq 0 ]; then @@ -138,15 +164,20 @@ kconfig_enabled() return 0 } -# Attempt to get the kernel config first via proc, and then by -# extracting it from the kernel image or the configs.ko using -# scripts/extract-ikconfig. +# Attempt to get the kernel config first by checking the modules directory +# then via proc, and finally by extracting it from the kernel image or the +# configs.ko using scripts/extract-ikconfig. # Return 1 for found. get_kconfig() { local proc_config="/proc/config.gz" local module_dir="/lib/modules/`uname -r`" - local configs_module="$module_dir/kernel/kernel/configs.ko" + local configs_module="$module_dir/kernel/kernel/configs.ko*" + + if [ -f $module_dir/config ]; then + IKCONFIG=$module_dir/config + return 1 + fi if [ ! -f $proc_config ]; then modprobe configs > /dev/null 2>&1 diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh index 2ff600388c30..c9ccb3c93d72 100755 --- a/tools/testing/selftests/kexec/test_kexec_file_load.sh +++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh @@ -97,10 +97,11 @@ check_for_imasig() check_for_modsig() { local module_sig_string="~Module signature appended~" - local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)" local ret=0 - if [ "$sig" == "$module_sig_string" ]; then + tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE | \ + grep -q "$module_sig_string" + if [ $? -eq 0 ]; then ret=1 log_info "kexec kernel image modsig signed" else @@ -225,8 +226,12 @@ get_secureboot_mode secureboot=$? # Are there pe and ima signatures -check_for_pesig -pe_signed=$? +if [ "$(get_arch)" == 'ppc64le' ]; then + pe_signed=0 +else + check_for_pesig + pe_signed=$? +fi check_for_imasig ima_signed=$? diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 8d50483fe204..f1180987492c 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -48,6 +48,10 @@ #include <stdarg.h> #include <stdio.h> +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + /* define kselftest exit codes */ #define KSFT_PASS 0 #define KSFT_FAIL 1 diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index ae0f0f33b2a6..471eaa7b3a3f 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -671,7 +671,9 @@ #define EXPECT_STRNE(expected, seen) \ __EXPECT_STR(expected, seen, !=, 0) +#ifndef ARRAY_SIZE #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif /* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is * not thread-safe, but it should be fine in most sane test scenarios. @@ -969,7 +971,7 @@ void __run_test(struct __fixture_metadata *f, t->passed = 1; t->skip = 0; t->trigger = 0; - t->step = 0; + t->step = 1; t->no_print = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index 20e2a9286d71..183b7e8e1b95 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -17,10 +17,6 @@ #include "../kselftest_harness.h" -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - /* * TEST_F_FORK() is useful when a test drop privileges but the corresponding * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index fe7ee2b0f29c..a40add31a2e3 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -141,7 +141,7 @@ endif # Selftest makefiles can override those targets by setting # OVERRIDE_TARGETS = 1. ifeq ($(OVERRIDE_TARGETS),) -LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h +LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh index 1b4d95d575f8..14fedeef762e 100755 --- a/tools/testing/selftests/lkdtm/stack-entropy.sh +++ b/tools/testing/selftests/lkdtm/stack-entropy.sh @@ -4,13 +4,27 @@ # Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test. set -e samples="${1:-1000}" +TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT +KSELFTEST_SKIP_TEST=4 + +# Verify we have LKDTM available in the kernel. +if [ ! -r $TRIGGER ] ; then + /sbin/modprobe -q lkdtm || true + if [ ! -r $TRIGGER ] ; then + echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)" + else + echo "Cannot write $TRIGGER (need to run as root?)" + fi + # Skip this test + exit $KSELFTEST_SKIP_TEST +fi # Capture dmesg continuously since it may fill up depending on sample size. log=$(mktemp -t stack-entropy-XXXXXX) dmesg --follow >"$log" & pid=$! report=-1 for i in $(seq 1 $samples); do - echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT + echo "REPORT_STACK" > $TRIGGER if [ -t 1 ]; then percent=$(( 100 * $i / $samples )) if [ "$percent" -ne "$report" ]; then diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c index 584dc6bc3b06..d2917054fe3a 100644 --- a/tools/testing/selftests/mount/unprivileged-remount-test.c +++ b/tools/testing/selftests/mount/unprivileged-remount-test.c @@ -204,7 +204,7 @@ bool test_unpriv_remount(const char *fstype, const char *mount_options, if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } - return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + return WEXITSTATUS(status) == EXIT_SUCCESS; } create_and_enter_userns(); @@ -282,7 +282,7 @@ static bool test_priv_mount_unpriv_remount(void) if (!WIFEXITED(status)) { die("child did not terminate cleanly\n"); } - return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false; + return WEXITSTATUS(status) == EXIT_SUCCESS; } orig_mnt_flags = read_mnt_flags(orig_path); diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c index 860198f83a53..50ed5d475dd1 100644 --- a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c +++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c @@ -191,7 +191,7 @@ static bool is_shared_mount(const char *path) #define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from" #define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to" -static int move_mount_set_group_supported(void) +static bool move_mount_set_group_supported(void) { int ret; @@ -222,7 +222,7 @@ static int move_mount_set_group_supported(void) AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP); umount2("/tmp", MNT_DETACH); - return ret < 0 ? false : true; + return ret >= 0; } FIXTURE(move_mount_set_group) { @@ -232,7 +232,7 @@ FIXTURE(move_mount_set_group) { FIXTURE_SETUP(move_mount_set_group) { - int ret; + bool ret; ASSERT_EQ(prepare_unpriv_mountns(), 0); @@ -254,7 +254,7 @@ FIXTURE_SETUP(move_mount_set_group) FIXTURE_TEARDOWN(move_mount_set_group) { - int ret; + bool ret; ret = move_mount_set_group_supported(); ASSERT_GE(ret, 0); @@ -348,7 +348,7 @@ TEST_F(move_mount_set_group, complex_sharing_copying) .shared = false, }; pid_t pid; - int ret; + bool ret; ret = move_mount_set_group_supported(); ASSERT_GE(ret, 0); diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index ad2982b72e02..3f4c8cfe7aca 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -66,6 +66,10 @@ NSB_LO_IP=172.16.2.2 NSA_LO_IP6=2001:db8:2::1 NSB_LO_IP6=2001:db8:2::2 +# non-local addresses for freebind tests +NL_IP=172.17.1.1 +NL_IP6=2001:db8:4::1 + MD5_PW=abc123 MD5_WRONG_PW=abc1234 @@ -316,6 +320,9 @@ addr2str() ${NSB_LO_IP6}) echo "ns-B loopback IPv6";; ${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";; + ${NL_IP}) echo "nonlocal IP";; + ${NL_IP6}) echo "nonlocal IPv6";; + ${VRF_IP}) echo "VRF IP";; ${VRF_IP6}) echo "VRF IPv6";; @@ -1780,6 +1787,14 @@ ipv4_addr_bind_novrf() done # + # raw socket with nonlocal bind + # + a=${NL_IP} + log_start + run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind" + + # # tcp sockets # a=${NSA_IP} @@ -1829,6 +1844,14 @@ ipv4_addr_bind_vrf() log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" # + # raw socket with nonlocal bind + # + a=${NL_IP} + log_start + run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" + + # # tcp sockets # for a in ${NSA_IP} ${VRF_IP} @@ -1978,6 +2001,7 @@ ipv4_rt() a=${NSA_IP} log_start + run_cmd nettest ${varg} -s & sleep 1 run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & @@ -3418,6 +3442,14 @@ ipv6_addr_bind_novrf() done # + # raw socket with nonlocal bind + # + a=${NL_IP6} + log_start + run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address" + + # # tcp sockets # a=${NSA_IP6} @@ -3462,6 +3494,14 @@ ipv6_addr_bind_vrf() log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind" # + # raw socket with nonlocal bind + # + a=${NL_IP6} + log_start + run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" + + # # tcp sockets # # address on enslaved device is valid for the VRF or device in a VRF @@ -4019,6 +4059,9 @@ usage: ${0##*/} OPTS -p Pause on fail -P Pause after each test -v Be verbose + +Tests: + $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER EOF } @@ -4091,8 +4134,6 @@ do # setup namespaces and config, but do not run any tests setup) setup; exit 0;; vrf_setup) setup "yes"; exit 0;; - - help) echo "Test names: $TESTS"; exit 0;; esac done diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh new file mode 100755 index 000000000000..8748d1b1d95b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -0,0 +1,543 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \ + vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \ + vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \ + vlmc_router_port_test vlmc_filtering_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="239.10.10.10" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + ip link add l $h1 $h1.10 up type vlan id 10 +} + +h1_destroy() +{ + ip link del $h1.10 + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 + ip link add l $h2 $h2.10 up type vlan id 10 +} + +h2_destroy() +{ + ip link del $h2.10 + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + bridge vlan add vid 10-11 dev $swp1 master + bridge vlan add vid 10-11 dev $swp2 master + + ip link set dev br0 type bridge mcast_vlan_snooping 1 + check_err $? "Could not enable global vlan multicast snooping" + log_test "Vlan multicast snooping enable" +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +vlmc_v2join_test() +{ + local expect=$1 + + RET=0 + ip address add dev $h2.10 $TEST_GROUP/32 autojoin + check_err $? "Could not join $TEST_GROUP" + + sleep 5 + bridge -j mdb show dev br0 | + jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and .vid == 10)" &>/dev/null + if [ $expect -eq 0 ]; then + check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" + else + check_fail $? "IGMPv2 report shouldn't have created mdb entry for $TEST_GROUP" + fi + + # check if we need to cleanup + if [ $RET -eq 0 ]; then + ip address del dev $h2.10 $TEST_GROUP/32 2>&1 1>/dev/null + sleep 5 + bridge -j mdb show dev br0 | + jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and \ + .vid == 10)" &>/dev/null + check_fail $? "IGMPv2 leave didn't remove mdb entry for $TEST_GROUP" + fi +} + +vlmc_control_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + log_test "Vlan global options existence" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_snooping == 1) " &>/dev/null + check_err $? "Wrong default mcast_snooping global option value" + log_test "Vlan mcast_snooping global option default value" + + RET=0 + vlmc_v2join_test 0 + bridge vlan global set vid 10 dev br0 mcast_snooping 0 + check_err $? "Could not disable multicast snooping in vlan 10" + vlmc_v2join_test 1 + log_test "Vlan 10 multicast snooping control" +} + +# setup for general query counting +vlmc_query_cnt_xstats() +{ + local type=$1 + local version=$2 + local dev=$3 + + ip -j link xstats type bridge_slave dev $dev | \ + jq -e ".[].multicast.${type}_queries.tx_v${version}" +} + +vlmc_query_cnt_setup() +{ + local type=$1 + local dev=$2 + + if [[ $type == "igmp" ]]; then + tc filter add dev $dev egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv4 dst_ip 224.0.0.1 ip_proto 2 \ + action pass + else + tc filter add dev $dev egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv6 dst_ip ff02::1 ip_proto icmpv6 \ + action pass + fi + + ip link set dev br0 type bridge mcast_stats_enabled 1 +} + +vlmc_query_cnt_cleanup() +{ + local dev=$1 + + ip link set dev br0 type bridge mcast_stats_enabled 0 + tc filter del dev $dev egress pref 10 +} + +vlmc_check_query() +{ + local type=$1 + local version=$2 + local dev=$3 + local expect=$4 + local time=$5 + local ret=0 + + vlmc_query_cnt_setup $type $dev + + local pre_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev) + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 + ret=$? + if [[ $ret -eq 0 ]]; then + sleep $time + + local tcstats=$(tc_rule_stats_get $dev 10 egress) + local post_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev) + + if [[ $tcstats != $expect || \ + $(($post_tx_xstats-$pre_tx_xstats)) != $expect || \ + $tcstats != $(($post_tx_xstats-$pre_tx_xstats)) ]]; then + ret=1 + fi + fi + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0 + vlmc_query_cnt_cleanup $dev + + return $ret +} + +vlmc_querier_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_querier == 0) " &>/dev/null + check_err $? "Wrong default mcast_querier global vlan option value" + log_test "Vlan mcast_querier global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 + check_err $? "Could not enable querier in vlan 10" + log_test "Vlan 10 multicast querier enable" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0 + + RET=0 + vlmc_check_query igmp 2 $swp1 1 1 + check_err $? "No vlan tagged IGMPv2 general query packets sent" + log_test "Vlan 10 tagged IGMPv2 general query sent" + + RET=0 + vlmc_check_query mld 1 $swp1 1 1 + check_err $? "No vlan tagged MLD general query packets sent" + log_test "Vlan 10 tagged MLD general query sent" +} + +vlmc_igmp_mld_version_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_igmp_version == 2) " &>/dev/null + check_err $? "Wrong default mcast_igmp_version global vlan option value" + log_test "Vlan mcast_igmp_version global option default value" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_mld_version == 1) " &>/dev/null + check_err $? "Wrong default mcast_mld_version global vlan option value" + log_test "Vlan mcast_mld_version global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3 + check_err $? "Could not set mcast_igmp_version in vlan 10" + log_test "Vlan 10 mcast_igmp_version option changed to 3" + + RET=0 + vlmc_check_query igmp 3 $swp1 1 1 + check_err $? "No vlan tagged IGMPv3 general query packets sent" + log_test "Vlan 10 tagged IGMPv3 general query sent" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2 + check_err $? "Could not set mcast_mld_version in vlan 10" + log_test "Vlan 10 mcast_mld_version option changed to 2" + + RET=0 + vlmc_check_query mld 2 $swp1 1 1 + check_err $? "No vlan tagged MLDv2 general query packets sent" + log_test "Vlan 10 tagged MLDv2 general query sent" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1 +} + +vlmc_last_member_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_last_member_count == 2) " &>/dev/null + check_err $? "Wrong default mcast_last_member_count global vlan option value" + log_test "Vlan mcast_last_member_count global option default value" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_last_member_interval == 100) " &>/dev/null + check_err $? "Wrong default mcast_last_member_interval global vlan option value" + log_test "Vlan mcast_last_member_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 3 + check_err $? "Could not set mcast_last_member_count in vlan 10" + log_test "Vlan 10 mcast_last_member_count option changed to 3" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 2 + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 200 + check_err $? "Could not set mcast_last_member_interval in vlan 10" + log_test "Vlan 10 mcast_last_member_interval option changed to 200" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 100 +} + +vlmc_startup_query_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_startup_query_interval == 3125) " &>/dev/null + check_err $? "Wrong default mcast_startup_query_interval global vlan option value" + log_test "Vlan mcast_startup_query_interval global option default value" + + RET=0 + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_startup_query_count == 2) " &>/dev/null + check_err $? "Wrong default mcast_startup_query_count global vlan option value" + log_test "Vlan mcast_startup_query_count global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 100 + check_err $? "Could not set mcast_startup_query_interval in vlan 10" + vlmc_check_query igmp 2 $swp1 2 3 + check_err $? "Wrong number of tagged IGMPv2 general queries sent" + log_test "Vlan 10 mcast_startup_query_interval option changed to 100" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 3 + check_err $? "Could not set mcast_startup_query_count in vlan 10" + vlmc_check_query igmp 2 $swp1 3 4 + check_err $? "Wrong number of tagged IGMPv2 general queries sent" + log_test "Vlan 10 mcast_startup_query_count option changed to 3" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 3125 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2 +} + +vlmc_membership_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_membership_interval == 26000) " &>/dev/null + check_err $? "Wrong default mcast_membership_interval global vlan option value" + log_test "Vlan mcast_membership_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 200 + check_err $? "Could not set mcast_membership_interval in vlan 10" + log_test "Vlan 10 mcast_membership_interval option changed to 200" + + RET=0 + vlmc_v2join_test 1 + log_test "Vlan 10 mcast_membership_interval mdb entry expire" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 26000 +} + +vlmc_querier_intvl_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_querier_interval == 25500) " &>/dev/null + check_err $? "Wrong default mcast_querier_interval global vlan option value" + log_test "Vlan mcast_querier_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 100 + check_err $? "Could not set mcast_querier_interval in vlan 10" + log_test "Vlan 10 mcast_querier_interval option changed to 100" + + RET=0 + ip link add dev br1 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 \ + mcast_vlan_snooping 1 + bridge vlan add vid 10 dev br1 self pvid untagged + ip link set dev $h1 master br1 + ip link set dev br1 up + bridge vlan add vid 10 dev $h1 master + bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1 + sleep 2 + ip link del dev br1 + ip addr replace 2001:db8:1::1/64 dev $h1 + vlmc_check_query igmp 2 $swp1 1 1 + check_err $? "Wrong number of IGMPv2 general queries after querier interval" + log_test "Vlan 10 mcast_querier_interval expire after outside query" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 25500 +} + +vlmc_query_intvl_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_query_interval == 12500) " &>/dev/null + check_err $? "Wrong default mcast_query_interval global vlan option value" + log_test "Vlan mcast_query_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 200 + check_err $? "Could not set mcast_query_interval in vlan 10" + # 1 is sent immediately, then 2 more in the next 5 seconds + vlmc_check_query igmp 2 $swp1 3 5 + check_err $? "Wrong number of tagged IGMPv2 general queries sent" + log_test "Vlan 10 mcast_query_interval option changed to 200" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 12500 +} + +vlmc_query_response_intvl_test() +{ + RET=0 + local goutput=`bridge -j vlan global show` + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null + check_err $? "Could not find vlan 10's global options" + + echo -n $goutput | + jq -e ".[].vlans[] | select(.vlan == 10 and \ + .mcast_query_response_interval == 1000) " &>/dev/null + check_err $? "Wrong default mcast_query_response_interval global vlan option value" + log_test "Vlan mcast_query_response_interval global option default value" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 200 + check_err $? "Could not set mcast_query_response_interval in vlan 10" + log_test "Vlan 10 mcast_query_response_interval option changed to 200" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000 +} + +vlmc_router_port_test() +{ + RET=0 + local goutput=`bridge -j -d vlan show` + echo -n $goutput | + jq -e ".[] | select(.ifname == \"$swp1\" and \ + .vlans[].vlan == 10)" &>/dev/null + check_err $? "Could not find port vlan 10's options" + + echo -n $goutput | + jq -e ".[] | select(.ifname == \"$swp1\" and \ + .vlans[].vlan == 10 and \ + .vlans[].mcast_router == 1)" &>/dev/null + check_err $? "Wrong default port mcast_router option value" + log_test "Port vlan 10 option mcast_router default value" + + RET=0 + bridge vlan set vid 10 dev $swp1 mcast_router 2 + check_err $? "Could not set port vlan 10's mcast_router option" + log_test "Port vlan 10 mcast_router option changed to 2" + + RET=0 + tc filter add dev $swp1 egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass + tc filter add dev $swp2 egress pref 10 prot 802.1Q \ + flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass + bridge vlan set vid 10 dev $swp2 mcast_router 0 + # we need to enable querier and disable query response interval to + # make sure packets are flooded only to router ports + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 \ + mcast_query_response_interval 0 + bridge vlan add vid 10 dev br0 self + sleep 1 + mausezahn br0 -Q 10 -c 10 -p 128 -b 01:00:5e:01:01:01 -B 239.1.1.1 \ + -t udp "dp=1024" &>/dev/null + local swp1_tcstats=$(tc_rule_stats_get $swp1 10 egress) + if [[ $swp1_tcstats != 10 ]]; then + check_err 1 "Wrong number of vlan 10 multicast packets flooded" + fi + local swp2_tcstats=$(tc_rule_stats_get $swp2 10 egress) + check_err $swp2_tcstats "Vlan 10 multicast packets flooded to non-router port" + log_test "Flood unknown vlan multicast packets to router port only" + + tc filter del dev $swp2 egress pref 10 + tc filter del dev $swp1 egress pref 10 + bridge vlan del vid 10 dev br0 self + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000 + bridge vlan set vid 10 dev $swp2 mcast_router 1 + bridge vlan set vid 10 dev $swp1 mcast_router 1 +} + +vlmc_filtering_test() +{ + RET=0 + ip link set dev br0 type bridge vlan_filtering 0 + ip -j -d link show dev br0 | \ + jq -e "select(.[0].linkinfo.info_data.mcast_vlan_snooping == 1)" &>/dev/null + check_fail $? "Vlan filtering is disabled but multicast vlan snooping is still enabled" + log_test "Disable multicast vlan snooping when vlan filtering is disabled" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index dfd827b7a9f9..7da783d6f453 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -20,6 +20,7 @@ NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} MCD=${MCD:=smcrouted} MC_CLI=${MC_CLI:=smcroutectl} +PING_COUNT=${PING_COUNT:=10} PING_TIMEOUT=${PING_TIMEOUT:=5} WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} @@ -1111,7 +1112,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null + $PING $args $dip -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT &> /dev/null } ping_test() @@ -1132,7 +1134,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null + $PING6 $args $dip -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT &> /dev/null } ping6_test() diff --git a/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh new file mode 100755 index 000000000000..0548b2b0d416 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1ad) + $swp2 | | +# | | vid 100 pvid untagged vid 100 pvid | | +# | | untagged | | +# | | + vx100 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | | +# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv6 + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 2001:db8:1::2/64 + vlan_create $h2 20 v$h2 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx100 type vxlan id 1000 \ + local 2001:db8:3::1 dstport "$VXPORT" \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx100 up + + ip link set dev vx100 master br1 + bridge vlan add vid 100 dev vx100 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 100 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 100 dev $swp2 pvid untagged + + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge vlan del vid 100 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 100 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx100 nomaster + ip link set dev vx100 down + ip link del dev vx100 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ + vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + bridge vlan add vid 100 dev w1 pvid untagged + + ip link add name vx100 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx100 up + bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx100 master br2 + tc qdisc add dev vx100 clsact + + bridge vlan add vid 100 dev vx100 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/64 + vlan_create w2 20 vw2 $host_addr2/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 2001:db8:1::3 2001:db8:2::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 2001:db8:1::4 2001:db8:2::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 ": local->local" + ping6_test $h1 2001:db8:1::3 ": local->remote 1" + ping6_test $h1 2001:db8:1::4 ": local->remote 2" +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh new file mode 100755 index 000000000000..f4930098974f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh @@ -0,0 +1,504 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------------------+ +-----------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 | +# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 | +# +----|---------------------------+ +-|---------------------------+ +# | | +# +----|------------------------------------------|---------------------------+ +# | SW | | | +# | +--|------------------------------------------|-------------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::1 local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 2001:db8:1::2/64 2001:db8:2::2/64 | | | +# | | | | | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | 2001:db8:1::3/64 2001:db8:2::3/64 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 2001:db8:4::1/64 2001:db8:3::1/128 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 2001:db8:4::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 2001:db8:5::2/64 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 2001:db8:5::1/64 2001:db8:3::2/128 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | | | | +# | | + vlan10 vlan20 + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::2 local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | | +# | | id 1000 id 2000 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | | +# | | default via | | default via | | +# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv6 + arp_decap +" +NUM_NETIFS=6 +source lib.sh + +require_command $ARPING + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/64 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/64 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:4::1/64 + ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2 + + ip link add name vx10 type vxlan id 1000 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 20 dev $swp2 pvid untagged + + ip address add 2001:db8:3::1/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::2/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::2/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + +} + +switch_destroy() +{ + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 2001:db8:3::1/128 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2 + ip address del dev $rp1 2001:db8:4::1/64 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 2001:db8:4::2/64 dev $rp2 + ip address add 2001:db8:5::2/64 dev v1 + + ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 +} + +spine_destroy() +{ + ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 + ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + + ip address del 2001:db8:5::2/64 dev v1 + ip address del 2001:db8:4::2/64 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 2001:db8:5::1/64 + ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2 + + ip link add name vx10 type vxlan id 1000 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 2001:db8:3::2/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::3/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::3/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +macs_populate() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f macs_populate + +macs_initialize() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + macs_populate $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \ + 2001:db8:3::2 + in_ns ns1 macs_populate $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \ + 2001:db8:3::1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + in_ns ns1 forwarding_enable + + macs_initialize +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20" + ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10" + ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20" + ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20" + ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10" +} + +arp_decap() +{ + # Repeat the ping tests, but without populating the neighbours. This + # makes sure we correctly decapsulate ARP packets + log_info "deleting neighbours from vlan interfaces" + + ip neigh del 2001:db8:1::4 dev vlan10 + ip neigh del 2001:db8:2::4 dev vlan20 + + ping_ipv6 + + ip neigh replace 2001:db8:1::4 lladdr $(in_ns ns1 mac_get w2) \ + nud noarp dev vlan10 extern_learn + ip neigh replace 2001:db8:2::4 lladdr $(in_ns ns1 mac_get w4) \ + nud noarp dev vlan20 extern_learn +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh new file mode 100755 index 000000000000..ac97f07e5ce8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -0,0 +1,804 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + test_ttl + test_tos + test_ecn_encap + test_ecn_decap + reapply_config + ping_ipv4 + ping_ipv6 + test_flood + test_unicast +"} + +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + tc qdisc add dev $rp1 clsact + + ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + tc qdisc del dev $rp1 clsact + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr_ipv4=$1; shift + local host_addr_ipv6=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr_ipv4/28 $host_addr_ipv6/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 192.0.2.3 2001:db8:1::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 192.0.2.4 2001:db8:1::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx1 is the first device to get +# attached to the bridge, and at that point the local IP is already +# configured. Try the other scenario of attaching the devices to a an +# already-offloaded bridge, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + ip link set dev vx1 nomaster + rp1_unset_addr + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self + sleep 1 + rp1_set_addr + sleep 5 +} + +__ping_ipv4() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome ARP noise. + PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv4() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_ip=192.0.2.1 + local w2_ns1_ip=192.0.2.3 + local w2_ns2_ip=192.0.2.4 + + ping_test $h1 192.0.2.2 ": local->local" + + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \ + "local->remote 1" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \ + "local->remote 2" +} + +__ping_ipv6() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip $dst_ip dst_ip $src_ip $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome neighbor discovery noise. + PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping6: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv6() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_ip=2001:db8:1::1 + local w2_ns1_ip=2001:db8:1::3 + local w2_ns2_ip=2001:db8:1::4 + + ping6_test $h1 2001:db8:1::2 ": local->local" + + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \ + "local->remote 1" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \ + "local->remote 2" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_sw action pass 2>/dev/null || \ + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_hw action pass +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $dst $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ -6 $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $dst $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 2001:db8:1::100 "flood" +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 2001:db8:4::1" + "$r2_mac vx1 2001:db8:5::1") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 2001:db8:1::2 0 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:1::3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:1::4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping6_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 2001:db8:1::3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: envelope TTL" +} + +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_tos 0x14 action pass + vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x14" v1 egress 77 10 + vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x18" v1 egress 77 0 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: envelope TOS inheritance" +} + +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 protocol ipv6 \ + flower ip_tos $tos action pass + sleep 1 + vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 protocol ipv6 + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" + local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01" + + $MZ -6 $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"86:dd:"$( : ETH type + )"6"$( : IP version + )"$inner_tos"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:08:"$( : Payload length + )"3a:"$( : Next header + )"04:"$( : Hop limit + )"$saddr:"$( : IP saddr + )"$daddr:"$( : IP daddr + )"80:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"00:"$( : ICMPv6.checksum + ) +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + sleep 1 + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 protocol ipv6 \ + flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 \ + ip_tos $decapped_tos action drop + sleep 1 + vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos="0:0" + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap "0:0" 00 0x00 + __test_ecn_decap "0:0" 01 0x00 + __test_ecn_decap "0:0" 02 0x00 + # 00 03 is tested in test_ecn_decap_error() + __test_ecn_decap "0:1" 00 0x01 + __test_ecn_decap "0:1" 01 0x01 + __test_ecn_decap "0:1" 02 0x01 + __test_ecn_decap "0:1" 03 0x03 + __test_ecn_decap "0:2" 00 0x02 + __test_ecn_decap "0:2" 01 0x01 + __test_ecn_decap "0:2" 02 0x02 + __test_ecn_decap "0:2" 03 0x03 + __test_ecn_decap "0:3" 00 0x03 + __test_ecn_decap "0:3" 01 0x03 + __test_ecn_decap "0:3" 02 0x03 + __test_ecn_decap "0:3" 03 0x03 + test_ecn_decap_error +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh new file mode 100755 index 000000000000..00540317737a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" +source vxlan_bridge_1d_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a5789721ba92..a596bbf3ed6a 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -680,26 +680,6 @@ test_pvid() log_test "VXLAN: flood after vlan re-add" } -vxlan_ping_test() -{ - local ping_dev=$1; shift - local ping_dip=$1; shift - local ping_args=$1; shift - local capture_dev=$1; shift - local capture_dir=$1; shift - local capture_pref=$1; shift - local expect=$1; shift - - local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) - ping_do $ping_dev $ping_dip "$ping_args" - local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) - local delta=$((t1 - t0)) - - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) - check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." -} - __test_learning() { local -a expects=(0 0 0 0 0) diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh new file mode 100755 index 000000000000..d880df89bc8b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -0,0 +1,837 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local: local: | | +# | | 2001:db8:3::1 2001:db8:3::1 | | +# | | remote: remote: | | +# | | 2001:db8:4::1 2001:db8:5::1 2001:db8:4::1 2001:db8:5::1 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 2001:db8:4::0/64 via 2001:db8:3::2 | +# | 2001:db8:5::0/64 via 2001:db8:3::2 | +# | | +# | + $rp1 | +# | | 2001:db8:3::1/64 | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 2001:db8:3::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 | +# +----|---------------------------------------|------------------+ +# | | +# +----|--------------------------------+ +----|-------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 | +# | | | | +# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 | +# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via | +# | | | 2001:db8:5::2 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | | +# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | | +# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-------------------------------------+ +------------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + reapply_config + ping_ipv4 + ping_ipv6 + test_flood + test_unicast + test_pvid + ping_ipv4 + ping_ipv6 + test_flood + test_pvid +"} + +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 2001:db8:1::2/64 + vlan_create $h2 20 v$h2 198.51.100.2/24 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 2001:db8:3::1/64 + + ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2 +} + +rp1_unset_addr() +{ + ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2 + ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2 + + ip address del dev $rp1 2001:db8:3::1/64 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + tc qdisc add dev $rp1 clsact + + ip link add name vx10 type vxlan id 1000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local 2001:db8:3::1 \ + dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \ + tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + tc qdisc del dev $rp1 clsact + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 2001:db8:3::2/64 + __simple_if_init v1 v$rp2 2001:db8:4::2/64 + __simple_if_init v3 v$rp2 2001:db8:5::2/64 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 2001:db8:5::2/64 + __simple_if_fini v1 2001:db8:4::2/64 + simple_if_fini $rp2 2001:db8:3::2/64 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1_ipv4=$1; shift + local host_addr1_ipv6=$1; shift + local host_addr2_ipv4=$1; shift + local host_addr2_ipv6=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/64 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" udp6zerocsumrx + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:3::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1_ipv4/28 $host_addr1_ipv6/64 + vlan_create w2 20 vw2 $host_addr2_ipv4/24 $host_addr2_ipv6/64 + + ip route add 2001:db8:3::0/64 nexthop via $nh_addr + ip route add $other_in_addr/128 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \ + 192.0.2.3 2001:db8:1::3 198.51.100.3 2001:db8:2::3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \ + 192.0.2.4 2001:db8:1::4 198.51.100.4 2001:db8:2::4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and at that point the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self + + rp1_set_addr + sleep 5 +} + +__ping_ipv4() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \ + flower vlan_ethtype ipv4 src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome ARP noise. + PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv4() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_10_ip=192.0.2.1 + local h1_20_ip=198.51.100.1 + local w2_10_ns1_ip=192.0.2.3 + local w2_10_ns2_ip=192.0.2.4 + local w2_20_ns1_ip=198.51.100.3 + local w2_20_ns2_ip=198.51.100.4 + + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \ + "local->remote 1 vid 10" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \ + "local->remote 2 vid 10" + __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \ + "local->remote 1 vid 20" + __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \ + "local->remote 2 vid 20" +} + +__ping_ipv6() +{ + local vxlan_local_ip=$1; shift + local vxlan_remote_ip=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local info=$1; shift + + RET=0 + + tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \ + flower ip_proto udp src_ip $vxlan_local_ip \ + dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass + # Match ICMP-reply packets after decapsulation, so source IP is + # destination IP of the ping and destination IP is source IP of the + # ping. + tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \ + flower vlan_ethtype ipv6 src_ip $dst_ip dst_ip $src_ip \ + $TC_FLAG action pass + + # Send 100 packets and verify that at least 100 packets hit the rule, + # to overcome neighbor discovery noise. + PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $rp1 egress" 101 100 + check_err $? "Encapsulated packets did not go through router" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 100 + check_err $? "Decapsulated packets did not go through switch" + + log_test "ping6: $info" + + tc filter del dev $swp1 egress + tc filter del dev $rp1 egress +} + +ping_ipv6() +{ + RET=0 + + local local_sw_ip=2001:db8:3::1 + local remote_ns1_ip=2001:db8:4::1 + local remote_ns2_ip=2001:db8:5::1 + local h1_10_ip=2001:db8:1::1 + local h1_20_ip=2001:db8:2::1 + local w2_10_ns1_ip=2001:db8:1::3 + local w2_10_ns2_ip=2001:db8:1::4 + local w2_20_ns1_ip=2001:db8:2::3 + local w2_20_ns2_ip=2001:db8:2::4 + + ping6_test $h1.10 2001:db8:1::2 ": local->local vid 10" + ping6_test $h1.20 2001:db8:2::2 ": local->local vid 20" + + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \ + "local->remote 1 vid 10" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \ + "local->remote 2 vid 10" + __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \ + "local->remote 1 vid 20" + __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \ + "local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dst_ip=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_sw action pass 2>/dev/null || \ + $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \ + proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \ + icmpv6 skip_hw action pass +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $dst $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ -6 $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $dst $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 2001:db8:1::100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 2001:db8:2::100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 2001:db8:4::1" + "$r2_mac vx10 2001:db8:5::1") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 2001:db8:1::2 0 10 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:1::3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:1::4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 2001:db8:4::1" \ + "$r2_mac vx20 2001:db8:5::1") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 2001:db8:2::2 0 20 "local MAC unicast" + __test_unicast $r1_mac 2001:db8:2::3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 2001:db8:2::4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=2001:db8:1::100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh new file mode 100755 index 000000000000..344f43ccb755 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 + ping_ipv6 +" +source vxlan_bridge_1q_ipv6.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh new file mode 100755 index 000000000000..904633427fd0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh @@ -0,0 +1,563 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + + +# +--------------------------------+ +-----------------------------+ +# | vrf-h1 | | vrf-h2 | +# | + $h1 | | + $h2 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 | +# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 | +# +----|---------------------------+ +-|---------------------------+ +# | | +# +----|------------------------------------------|---------------------------+ +# | SW | | | +# | +--|------------------------------------------|-------------------------+ | +# | | + $swp1 br1 + $swp2 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::1 local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 2001:db8:3::1 | | +# | | remote 2001:db8:3::2 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | | | +# | | + vlan10 | vlan20 + | | +# | | | 2001:db8:1::2/64 | 2001:db8:2::2/64 | | | +# | | | | | | | +# | | + vlan10-v (macvlan) + vlan20-v (macvlan) + | | +# | | 2001:db8:1::3/64 vlan4001 2001:db8:2::3/64 | | +# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 2001:db8:4::1/64 2001:db8:3::1 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | vrf-spine | +# | + $rp2 | +# | 2001:db8:4::2/64 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | | +# | + v1 (veth) | +# | | 2001:db8:5::2/64 | +# +----|--------------------------------------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | + v2 (veth) +lo NS1 (netns) | +# | 2001:db8:5::1/64 2001:db8:3::2/128 | +# | | +# | +-----------------------------------------------------------------------+ | +# | | vrf-green | | +# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | | +# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | | +# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | | +# | | | vlan4001 | | | +# | | + vlan10 + vlan20 + | | +# | | | 2001:db8:1::3/64 | 2001:db8:2::3/64 | | | +# | | | | | | | +# | | +--------------------------------+--------------------------------+ | | +# | | | | | +# | +-----------------------------------|-----------------------------------+ | +# | | | +# | +-----------------------------------+-----------------------------------+ | +# | | | | +# | | + vx10 + vx20 | | +# | | local 2001:db8:3::2 local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | | +# | | id 1010 id 1020 | | +# | | dstport 4789 dstport 4789 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | | | | +# | | + vx4001 | | +# | | local 2001:db8:3::2 | | +# | | remote 2001:db8:3::1 | | +# | | id 104001 | | +# | | dstport 4789 | | +# | | vid 4001 pvid untagged | | +# | | | | +# | | + w1 (veth) + w3 (veth) | | +# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | | +# | +--|------------------------------------------|-------------------------+ | +# | | | | +# | | | | +# | +--|----------------------+ +--|-------------------------+ | +# | | | vrf-h1 | | | vrf-h2 | | +# | | + w2 (veth) | | + w4 (veth) | | +# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | | +# | | default via | | default via | | +# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | | +# | +-------------------------+ +----------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv6 +" +NUM_NETIFS=6 +source lib.sh + +hx_create() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + ip address add $ip_addr/64 dev $if_name + ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \ + dev $if_name + ip route add default vrf $vrf_name nexthop via $gw_ip +} +export -f hx_create + +hx_destroy() +{ + local vrf_name=$1; shift + local if_name=$1; shift + local ip_addr=$1; shift + local gw_ip=$1; shift + + ip route del default vrf $vrf_name nexthop via $gw_ip + ip neigh del $gw_ip dev $if_name + ip address del $ip_addr/64 dev $if_name + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +h1_create() +{ + hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h1_destroy() +{ + hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3 +} + +h2_create() +{ + hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +h2_destroy() +{ + hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 2001:db8:4::1/64 + ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2 + + ip link add name vx10 type vxlan id 1010 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + ip link add name vx4001 type vxlan id 104001 \ + local 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip address add 2001:db8:3::1/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::2/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::2/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 20 dev $swp2 pvid untagged +} + +switch_destroy() +{ + bridge vlan del vid 20 dev br1 self + bridge vlan del vid 10 dev br1 self + + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20 + bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10 + + bridge vlan del vid 4001 dev br1 self + ip link del dev vlan4001 + + ip link del dev vlan20 + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 2001:db8:3::1/128 dev lo + + bridge vlan del vid 20 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 4001 dev vx4001 + ip link set dev vx4001 nomaster + + ip link set dev vx4001 down + ip link del dev vx4001 + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2 + ip address del dev $rp1 2001:db8:4::1/64 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +spine_create() +{ + vrf_create "vrf-spine" + ip link set dev $rp2 master vrf-spine + ip link set dev v1 master vrf-spine + ip link set dev vrf-spine up + ip link set dev $rp2 up + ip link set dev v1 up + + ip address add 2001:db8:4::2/64 dev $rp2 + ip address add 2001:db8:5::2/64 dev v1 + + ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 +} + +spine_destroy() +{ + ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \ + 2001:db8:5::1 + ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \ + 2001:db8:4::1 + + ip address del 2001:db8:5::2/64 dev v1 + ip address del 2001:db8:4::2/64 dev $rp2 + + ip link set dev v1 down + ip link set dev $rp2 down + vrf_destroy "vrf-spine" +} + +ns_h1_create() +{ + hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3 +} +export -f ns_h1_create + +ns_h2_create() +{ + hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3 +} +export -f ns_h2_create + +ns_switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + ip link set dev br1 up + + ip link set dev v2 up + ip address add dev v2 2001:db8:5::1/64 + ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2 + + ip link add name vx10 type vxlan id 1010 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 1020 \ + local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link add name vx4001 type vxlan id 104001 \ + local 2001:db8:3::2 dstport 4789 \ + nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + ip link set dev w1 master br1 + ip link set dev w1 up + bridge vlan add vid 10 dev w1 pvid untagged + + ip link set dev w3 master br1 + ip link set dev w3 up + bridge vlan add vid 20 dev w3 pvid untagged + + ip address add 2001:db8:3::2/128 dev lo + + # Create SVIs + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + ip address add 2001:db8:1::3/64 dev vlan10 + ip link add link vlan10 name vlan10-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:1::3/64 dev vlan10-v + + ip link add link br1 name vlan20 up master vrf-green type vlan id 20 + ip address add 2001:db8:2::3/64 dev vlan20 + ip link add link vlan20 name vlan20-v up master vrf-green \ + address 00:00:5e:00:01:01 type macvlan mode private + ip address add 2001:db8:2::3/64 dev vlan20-v + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 20 dev br1 self + bridge vlan add vid 4001 dev br1 self + + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10 + bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20 +} +export -f ns_switch_create + +ns_init() +{ + ip link add name w1 type veth peer name w2 + ip link add name w3 type veth peer name w4 + + ip link set dev lo up + + ns_h1_create + ns_h2_create + ns_switch_create +} +export -f ns_init + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 ns_init +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +__l2_vni_init() +{ + local mac1=$1; shift + local mac2=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local dst=$1; shift + + bridge fdb add $mac1 dev vx10 self master extern_learn static \ + dst $dst vlan 10 + bridge fdb add $mac2 dev vx20 self master extern_learn static \ + dst $dst vlan 20 + + ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \ + extern_learn + ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \ + extern_learn +} +export -f __l2_vni_init + +l2_vni_init() +{ + local h1_ns_mac=$(in_ns ns1 mac_get w2) + local h2_ns_mac=$(in_ns ns1 mac_get w4) + local h1_mac=$(mac_get $h1) + local h2_mac=$(mac_get $h2) + + __l2_vni_init $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \ + 2001:db8:3::2 + in_ns ns1 __l2_vni_init $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \ + 2001:db8:3::1 +} + +__l3_vni_init() +{ + local mac=$1; shift + local vtep_ip=$1; shift + local host1_ip=$1; shift + local host2_ip=$1; shift + + bridge fdb add $mac dev vx4001 self master extern_learn static \ + dst $vtep_ip vlan 4001 + + ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn + + ip route add $host1_ip/128 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink + ip route add $host2_ip/128 vrf vrf-green nexthop via $vtep_ip \ + dev vlan4001 onlink +} +export -f __l3_vni_init + +l3_vni_init() +{ + local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001) + local vlan4001_mac=$(mac_get vlan4001) + + __l3_vni_init $vlan4001_ns_mac 2001:db8:3::2 2001:db8:1::4 \ + 2001:db8:2::4 + in_ns ns1 __l3_vni_init $vlan4001_mac 2001:db8:3::1 2001:db8:1::1 \ + 2001:db8:2::1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + spine_create + ns1_create + in_ns ns1 forwarding_enable + + l2_vni_init + l3_vni_init +} + +cleanup() +{ + pre_cleanup + + ns1_destroy + spine_destroy + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20" + ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10" + ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20" + ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20" + ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index cf37ce86b0fd..30024d0ed373 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -57,17 +57,14 @@ #include <string.h> #include <unistd.h> +#include "../kselftest.h" + #define DPORT 8000 #define SPORT 1500 #define PAYLOAD_LEN 100 -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define NUM_PACKETS 4 #define START_SEQ 100 #define START_ACK 100 -#define SIP6 "fdaa::2" -#define DIP6 "fdaa::1" -#define SIP4 "192.168.1.200" -#define DIP4 "192.168.1.100" #define ETH_P_NONE 0 #define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) #define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) @@ -75,6 +72,10 @@ #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +static const char *addr6_src = "fdaa::2"; +static const char *addr6_dst = "fdaa::1"; +static const char *addr4_src = "192.168.1.200"; +static const char *addr4_dst = "192.168.1.100"; static int proto = -1; static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; static char *testname = "data"; @@ -178,18 +179,18 @@ static uint16_t tcp_checksum(void *buf, int payload_len) uint32_t sum = 0; if (proto == PF_INET6) { - if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1) + if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1) error(1, errno, "inet_pton6 source ip pseudo"); - if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1) + if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1) error(1, errno, "inet_pton6 dest ip pseudo"); ph6.protocol = htons(IPPROTO_TCP); ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); sum = checksum_nofold(&ph6, sizeof(ph6), 0); } else if (proto == PF_INET) { - if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1) + if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1) error(1, errno, "inet_pton source ip pseudo"); - if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1) + if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1) error(1, errno, "inet_pton dest ip pseudo"); ph4.protocol = htons(IPPROTO_TCP); ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); @@ -229,9 +230,9 @@ static void fill_networklayer(void *buf, int payload_len) ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); ip6h->nexthdr = IPPROTO_TCP; ip6h->hop_limit = 8; - if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1) + if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) error(1, errno, "inet_pton source ip6"); - if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1) + if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1) error(1, errno, "inet_pton dest ip6"); } else if (proto == PF_INET) { memset(iph, 0, sizeof(*iph)); @@ -243,9 +244,9 @@ static void fill_networklayer(void *buf, int payload_len) iph->tot_len = htons(sizeof(struct tcphdr) + payload_len + sizeof(struct iphdr)); iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ - if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1) + if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1) error(1, errno, "inet_pton source ip"); - if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1) + if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1) error(1, errno, "inet_pton dest ip"); iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); } @@ -731,7 +732,7 @@ static void set_timeout(int fd) { struct timeval timeout; - timeout.tv_sec = 120; + timeout.tv_sec = 3; timeout.tv_usec = 0; if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(timeout)) < 0) @@ -1023,11 +1024,13 @@ static void gro_receiver(void) static void parse_args(int argc, char **argv) { static const struct option opts[] = { + { "daddr", required_argument, NULL, 'd' }, { "dmac", required_argument, NULL, 'D' }, { "iface", required_argument, NULL, 'i' }, { "ipv4", no_argument, NULL, '4' }, { "ipv6", no_argument, NULL, '6' }, { "rx", no_argument, NULL, 'r' }, + { "saddr", required_argument, NULL, 's' }, { "smac", required_argument, NULL, 'S' }, { "test", required_argument, NULL, 't' }, { "verbose", no_argument, NULL, 'v' }, @@ -1035,7 +1038,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1045,6 +1048,9 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'd': + addr4_dst = addr6_dst = optarg; + break; case 'D': dmac = optarg; break; @@ -1054,6 +1060,9 @@ static void parse_args(int argc, char **argv) case 'r': tx_socket = false; break; + case 's': + addr4_src = addr6_src = optarg; + break; case 'S': smac = optarg; break; @@ -1091,5 +1100,7 @@ int main(int argc, char **argv) gro_sender(); else gro_receiver(); + + fprintf(stderr, "Gro::%s test passed.\n", testname); return 0; } diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 3d7dde2c321b..cc10c10c5ed9 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -41,7 +41,6 @@ #define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 7569d892967a..49daae73c41e 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_inq mptcp_sockopt pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index bbf4e448bad9..0356c4501c99 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -8,7 +8,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt +TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq TEST_FILES := settings diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 2b82628decb1..d36b7da5082a 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -12,5 +12,10 @@ CONFIG_NF_TABLES=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NF_TABLES_IPV4=y -CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_TPROXY=m +CONFIG_NFT_SOCKET=m +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IPV6_MULTIPLE_TABLES=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 95e81d557b08..8628aa61b763 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -16,6 +16,7 @@ #include <unistd.h> #include <time.h> +#include <sys/ioctl.h> #include <sys/poll.h> #include <sys/sendfile.h> #include <sys/stat.h> @@ -28,6 +29,7 @@ #include <linux/tcp.h> #include <linux/time_types.h> +#include <linux/sockios.h> extern int optind; @@ -59,7 +61,6 @@ static enum cfg_peek cfg_peek = CFG_NONE_PEEK; static const char *cfg_host; static const char *cfg_port = "12000"; static int cfg_sock_proto = IPPROTO_MPTCP; -static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; @@ -69,32 +70,56 @@ static unsigned int cfg_time; static unsigned int cfg_do_w; static int cfg_wait; static uint32_t cfg_mark; +static char *cfg_input; +static int cfg_repeat = 1; struct cfg_cmsg_types { unsigned int cmsg_enabled:1; unsigned int timestampns:1; + unsigned int tcp_inq:1; }; +struct cfg_sockopt_types { + unsigned int transparent:1; +}; + +struct tcp_inq_state { + unsigned int last; + bool expect_eof; +}; + +static struct tcp_inq_state tcp_inq; + static struct cfg_cmsg_types cfg_cmsg_types; +static struct cfg_sockopt_types cfg_sockopt_types; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" - "[-l] [-w sec] [-t num] [-T num] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] " + "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] " + "[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); + fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); + fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); + fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " + "incoming connections, in client mode, disconnect and reconnect to the server\n"); + fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " + "-- for MPJ tests\n"); + fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); + fprintf(stderr, "\t-M mark -- set socket packet mark\n"); + fprintf(stderr, "\t-o option -- test sockopt <option>\n"); + fprintf(stderr, "\t-p num -- use port num\n"); + fprintf(stderr, + "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); fprintf(stderr, "\t-t num -- set poll timeout to num\n"); fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); - fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " + "-- for remove addr tests\n"); fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); - fprintf(stderr, "\t-p num -- use port num\n"); fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); - fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); - fprintf(stderr, "\t-M mark -- set socket packet mark\n"); - fprintf(stderr, "\t-u -- check mptcp ulp\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); - fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); - fprintf(stderr, - "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); exit(1); } @@ -185,6 +210,58 @@ static void set_mark(int fd, uint32_t mark) } } +static void set_transparent(int fd, int pf) +{ + int one = 1; + + switch (pf) { + case AF_INET: + if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) + perror("IP_TRANSPARENT"); + break; + case AF_INET6: + if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) + perror("IPV6_TRANSPARENT"); + break; + } +} + +static int do_ulp_so(int sock, const char *name) +{ + return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); +} + +#define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) +static void sock_test_tcpulp(int sock, int proto, unsigned int line) +{ + socklen_t buflen = 8; + char buf[8] = ""; + int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); + + if (ret != 0) + X("getsockopt"); + + if (buflen > 0) { + if (strcmp(buf, "mptcp") != 0) + xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); + ret = do_ulp_so(sock, "tls"); + if (ret == 0) + X("setsockopt"); + } else if (proto == IPPROTO_MPTCP) { + ret = do_ulp_so(sock, "tls"); + if (ret != -1) + X("setsockopt"); + } + + ret = do_ulp_so(sock, "mptcp"); + if (ret != -1) + X("setsockopt"); + +#undef X +} + +#define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) + static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { @@ -208,10 +285,15 @@ static int sock_listen_mptcp(const char * const listenaddr, if (sock < 0) continue; + SOCK_TEST_TCPULP(sock, cfg_sock_proto); + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) perror("setsockopt"); + if (cfg_sockopt_types.transparent) + set_transparent(sock, pf); + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) break; /* success */ @@ -227,54 +309,22 @@ static int sock_listen_mptcp(const char * const listenaddr, return sock; } + SOCK_TEST_TCPULP(sock, cfg_sock_proto); + if (listen(sock, 20)) { perror("listen"); close(sock); return -1; } - return sock; -} + SOCK_TEST_TCPULP(sock, cfg_sock_proto); -static bool sock_test_tcpulp(const char * const remoteaddr, - const char * const port) -{ - struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, - .ai_socktype = SOCK_STREAM, - }; - struct addrinfo *a, *addr; - int sock = -1, ret = 0; - bool test_pass = false; - - hints.ai_family = AF_INET; - - xgetaddrinfo(remoteaddr, port, &hints, &addr); - for (a = addr; a; a = a->ai_next) { - sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP); - if (sock < 0) { - perror("socket"); - continue; - } - ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp", - sizeof("mptcp")); - if (ret == -1 && errno == EOPNOTSUPP) - test_pass = true; - close(sock); - - if (test_pass) - break; - if (!ret) - fprintf(stderr, - "setsockopt(TCP_ULP) returned 0\n"); - else - perror("setsockopt(TCP_ULP)"); - } - return test_pass; + return sock; } static int sock_connect_mptcp(const char * const remoteaddr, - const char * const port, int proto) + const char * const port, int proto, + struct addrinfo **peer) { struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, @@ -293,11 +343,15 @@ static int sock_connect_mptcp(const char * const remoteaddr, continue; } + SOCK_TEST_TCPULP(sock, proto); + if (cfg_mark) set_mark(sock, cfg_mark); - if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { + *peer = a; break; /* success */ + } perror("connect()"); close(sock); @@ -305,6 +359,8 @@ static int sock_connect_mptcp(const char * const remoteaddr, } freeaddrinfo(addr); + if (sock != -1) + SOCK_TEST_TCPULP(sock, proto); return sock; } @@ -364,7 +420,9 @@ static size_t do_write(const int fd, char *buf, const size_t len) static void process_cmsg(struct msghdr *msgh) { struct __kernel_timespec ts; + bool inq_found = false; bool ts_found = false; + unsigned int inq = 0; struct cmsghdr *cmsg; for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { @@ -373,12 +431,27 @@ static void process_cmsg(struct msghdr *msgh) ts_found = true; continue; } + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); + inq_found = true; + continue; + } + } if (cfg_cmsg_types.timestampns) { if (!ts_found) xerror("TIMESTAMPNS not present\n"); } + + if (cfg_cmsg_types.tcp_inq) { + if (!inq_found) + xerror("TCP_INQ not present\n"); + + if (inq > 1024) + xerror("tcp_inq %u is larger than one kbyte\n", inq); + tcp_inq.last = inq; + } } static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) @@ -395,10 +468,23 @@ static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) .msg_controllen = sizeof(msg_buf), }; int flags = 0; + unsigned int last_hint = tcp_inq.last; int ret = recvmsg(fd, &msg, flags); - if (ret <= 0) + if (ret <= 0) { + if (ret == 0 && tcp_inq.expect_eof) + return ret; + + if (ret == 0 && cfg_cmsg_types.tcp_inq) + if (last_hint != 1 && last_hint != 0) + xerror("EOF but last tcp_inq hint was %u\n", last_hint); + return ret; + } + + if (tcp_inq.expect_eof) + xerror("expected EOF, last_hint %u, now %u\n", + last_hint, tcp_inq.last); if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) xerror("got %lu bytes of cmsg data, expected 0\n", @@ -410,6 +496,19 @@ static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) if (msg.msg_controllen) process_cmsg(&msg); + if (cfg_cmsg_types.tcp_inq) { + if ((size_t)ret < len && last_hint > (unsigned int)ret) { + if (ret + 1 != (int)last_hint) { + int next = read(fd, msg_buf, sizeof(msg_buf)); + + xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", + ret, (unsigned int)len, last_hint, tcp_inq.last, next); + } else { + tcp_inq.expect_eof = true; + } + } + } + return ret; } @@ -441,14 +540,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) return ret; } -static void set_nonblock(int fd) +static void set_nonblock(int fd, bool nonblock) { int flags = fcntl(fd, F_GETFL); if (flags == -1) return; - fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (nonblock) + fcntl(fd, F_SETFL, flags | O_NONBLOCK); + else + fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); } static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out) @@ -460,7 +562,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after unsigned int woff = 0, wlen = 0; char wbuf[8192]; - set_nonblock(peerfd); + set_nonblock(peerfd, true); for (;;) { char rbuf[8192]; @@ -555,7 +657,6 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after if (cfg_remove) usleep(cfg_wait); - close(peerfd); return 0; } @@ -697,7 +798,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd, return err; } -static int copyfd_io(int infd, int peerfd, int outfd) +static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd) { bool in_closed_after_out = false; struct timespec start, end; @@ -736,6 +837,9 @@ static int copyfd_io(int infd, int peerfd, int outfd) if (ret) return ret; + if (close_peerfd) + close(peerfd); + if (cfg_time) { unsigned int delta_ms; @@ -847,7 +951,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!(cfg_join || cfg_remove) && (r & 1)) + if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) close(fd); } @@ -857,7 +961,9 @@ int main_loop_s(int listensock) struct pollfd polls; socklen_t salen; int remotesock; + int fd = 0; +again: polls.fd = listensock; polls.events = POLLIN; @@ -878,12 +984,27 @@ int main_loop_s(int listensock) check_sockaddr(pf, &ss, salen); check_getpeername(remotesock, &ss, salen); - return copyfd_io(0, remotesock, 1); + if (cfg_input) { + fd = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s: %d", cfg_input, errno); + } + + SOCK_TEST_TCPULP(remotesock, 0); + + copyfd_io(fd, remotesock, 1, true); + } else { + perror("accept"); + return 1; } - perror("accept"); + if (--cfg_repeat > 0) { + if (cfg_input) + close(fd); + goto again; + } - return 1; + return 0; } static void init_rng(void) @@ -919,6 +1040,8 @@ static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) if (cmsg->timestampns) xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); + if (cmsg->tcp_inq) + xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); } static void parse_cmsg_types(const char *type) @@ -940,21 +1063,81 @@ static void parse_cmsg_types(const char *type) return; } + if (strncmp(type, "TCPINQ", len) == 0) { + cfg_cmsg_types.tcp_inq = 1; + return; + } + fprintf(stderr, "Unrecognized cmsg option %s\n", type); exit(1); } +static void parse_setsock_options(const char *name) +{ + char *next = strchr(name, ','); + unsigned int len = 0; + + if (next) { + parse_setsock_options(next + 1); + len = next - name; + } else { + len = strlen(name); + } + + if (strncmp(name, "TRANSPARENT", len) == 0) { + cfg_sockopt_types.transparent = 1; + return; + } + + fprintf(stderr, "Unrecognized setsockopt option %s\n", name); + exit(1); +} + +void xdisconnect(int fd, int addrlen) +{ + struct sockaddr_storage empty; + int msec_sleep = 10; + int queued = 1; + int i; + + shutdown(fd, SHUT_WR); + + /* while until the pending data is completely flushed, the later + * disconnect will bypass/ignore/drop any pending data. + */ + for (i = 0; ; i += msec_sleep) { + if (ioctl(fd, SIOCOUTQ, &queued) < 0) + xerror("can't query out socket queue: %d", errno); + + if (!queued) + break; + + if (i > poll_timeout) + xerror("timeout while waiting for spool to complete"); + usleep(msec_sleep * 1000); + } + + memset(&empty, 0, sizeof(empty)); + empty.ss_family = AF_UNSPEC; + if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) + xerror("can't disconnect: %d", errno); +} + int main_loop(void) { - int fd; + int fd, ret, fd_in = 0; + struct addrinfo *peer; /* listener is ready. */ - fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto); + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer); if (fd < 0) return 2; +again: check_getpeername_connect(fd); + SOCK_TEST_TCPULP(fd, cfg_sock_proto); + if (cfg_rcvbuf) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) @@ -962,7 +1145,31 @@ int main_loop(void) if (cfg_cmsg_types.cmsg_enabled) apply_cmsg_types(fd, &cfg_cmsg_types); - return copyfd_io(0, fd, 1); + if (cfg_input) { + fd_in = open(cfg_input, O_RDONLY); + if (fd < 0) + xerror("can't open %s:%d", cfg_input, errno); + } + + /* close the client socket open only if we are not going to reconnect */ + ret = copyfd_io(fd_in, fd, 1, cfg_repeat == 1); + if (ret) + return ret; + + if (--cfg_repeat > 0) { + xdisconnect(fd, peer->ai_addrlen); + + /* the socket could be unblocking at this point, we need the + * connect to be blocking + */ + set_nonblock(fd, false); + if (connect(fd, peer->ai_addr, peer->ai_addrlen)) + xerror("can't reconnect: %d", errno); + if (cfg_input) + close(fd_in); + goto again; + } + return 0; } int parse_proto(const char *proto) @@ -1047,7 +1254,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) { + while ((c = getopt(argc, argv, "6c:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { switch (c) { case 'j': cfg_join = true; @@ -1061,6 +1268,12 @@ static void parse_opts(int argc, char **argv) if (cfg_do_w <= 0) cfg_do_w = 50; break; + case 'i': + cfg_input = optarg; + break; + case 'I': + cfg_repeat = atoi(optarg); + break; case 'l': listen_mode = true; break; @@ -1073,9 +1286,6 @@ static void parse_opts(int argc, char **argv) case 'h': die_usage(); break; - case 'u': - tcpulp_audit = true; - break; case '6': pf = AF_INET6; break; @@ -1108,6 +1318,9 @@ static void parse_opts(int argc, char **argv) case 'c': parse_cmsg_types(optarg); break; + case 'o': + parse_setsock_options(optarg); + break; } } @@ -1126,9 +1339,6 @@ int main(int argc, char *argv[]) signal(SIGUSR1, handle_signal); parse_opts(argc, argv); - if (tcpulp_audit) - return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1; - if (listen_mode) { int fd = sock_listen_mptcp(cfg_host, cfg_port); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 559173a8e387..cb5809b89081 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -7,6 +7,7 @@ optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 sin="" sout="" +cin_disconnect="" cin="" cout="" ksft_skip=4 @@ -24,6 +25,7 @@ options_log=true do_tcp=0 checksum=false filesize=0 +connect_per_transfer=1 if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -127,6 +129,7 @@ TEST_COUNT=0 cleanup() { + rm -f "$cin_disconnect" "$cout_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" @@ -149,6 +152,8 @@ sout=$(mktemp) cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) +cin_disconnect="$cin".disconnect +cout_disconnect="$cout".disconnect trap cleanup EXIT for i in "$ns1" "$ns2" "$ns3" "$ns4";do @@ -296,24 +301,6 @@ check_mptcp_disabled() return 0 } -check_mptcp_ulp_setsockopt() -{ - local t retval - t="ns_ulp-$sech-$(mktemp -u XXXXXX)" - - ip netns add ${t} || exit $ksft_skip - if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then - printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n" - retval=1 - ret=$retval - else - printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n" - retval=0 - fi - ip netns del ${t} - return $retval -} - # $1: IP address is_v6() { @@ -518,8 +505,8 @@ do_transfer() cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+1)) - expect_ackrx=$((stat_ackrx_last_l+1)) + expect_synrx=$((stat_synrx_last_l+$connect_per_transfer)) + expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer)) fi if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then @@ -671,6 +658,82 @@ run_tests() run_tests_lo $1 $2 $3 0 } +run_test_transparent() +{ + local connect_addr="$1" + local msg="$2" + + local connector_ns="$ns1" + local listener_ns="$ns2" + local lret=0 + local r6flag="" + + # skip if we don't want v6 + if ! $ipv6 && is_v6 "${connect_addr}"; then + return 0 + fi + +ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" +flush ruleset +table inet mangle { + chain divert { + type filter hook prerouting priority -150; + + meta l4proto tcp socket transparent 1 meta mark set 1 accept + tcp dport 20000 tproxy to :20000 meta mark set 1 accept + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: $msg, could not load nft ruleset" + return + fi + + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + r6flag="-6" + else + local_addr="0.0.0.0" + fi + + ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 + if [ $? -ne 0 ]; then + ip netns exec "$listener_ns" nft flush ruleset + echo "SKIP: $msg, ip $r6flag rule failed" + return + fi + + ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 + if [ $? -ne 0 ]; then + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + echo "SKIP: $msg, ip route add local $local_addr failed" + return + fi + + echo "INFO: test $msg" + + TEST_COUNT=10000 + local extra_args="-o TRANSPARENT" + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ + ${connect_addr} ${local_addr} "${extra_args}" + lret=$? + + ip netns exec "$listener_ns" nft flush ruleset + ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 + ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 + + if [ $lret -ne 0 ]; then + echo "FAIL: $msg, mptcp connection error" 1>&2 + ret=$lret + return 1 + fi + + echo "PASS: $msg" + return 0 +} + run_tests_peekmode() { local peekmode="$1" @@ -680,6 +743,33 @@ run_tests_peekmode() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } +run_tests_disconnect() +{ + local peekmode="$1" + local old_cin=$cin + local old_sin=$sin + + cat $cin $cin $cin > "$cin".disconnect + + # force do_transfer to cope with the multiple tranmissions + sin="$cin.disconnect" + sin_disconnect=$old_sin + cin="$cin.disconnect" + cin_disconnect="$old_cin" + connect_per_transfer=3 + + echo "INFO: disconnect" + run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin" + run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" + + # restore previous status + cout=$old_cout + cout_disconnect="$cout".disconnect + cin=$old_cin + cin_disconnect="$cin".disconnect + connect_per_transfer=1 +} + display_time() { time_end=$(date +%s) @@ -704,8 +794,6 @@ make_file "$sin" "server" check_mptcp_disabled -check_mptcp_ulp_setsockopt - stop_if_error "The kernel configuration is not valid for MPTCP" echo "INFO: validating network environment with pings" @@ -794,5 +882,12 @@ run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" stop_if_error "Tests with peek mode have failed" +# connect to ns4 ip address, ns2 should intercept/proxy +run_test_transparent 10.0.3.1 "tproxy ipv4" +run_test_transparent dead:beef:3::1 "tproxy ipv6" +stop_if_error "Tests with tproxy have failed" + +run_tests_disconnect + display_time exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c new file mode 100644 index 000000000000..29f75e2a1116 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> +#include <time.h> + +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> +#include <linux/sockios.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif + +static int pf = AF_INET; +static int proto_tx = IPPROTO_MPTCP; +static int proto_rx = IPPROTO_MPTCP; + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n"); + exit(r); +} + +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto_rx); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) + xerror("could not create listen socket"); + + if (listen(sock, 20)) + die_perror("listen"); + + return sock; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) + continue; + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + die_perror("connect"); + } + + if (sock < 0) + xerror("could not create connect socket"); + + freeaddrinfo(addr); + return sock; +} + +static int protostr_to_num(const char *s) +{ + if (strcasecmp(s, "tcp") == 0) + return IPPROTO_TCP; + if (strcasecmp(s, "mptcp") == 0) + return IPPROTO_MPTCP; + + die_usage(1); + return 0; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "h6t:r:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case '6': + pf = AF_INET6; + break; + case 't': + proto_tx = protostr_to_num(optarg); + break; + case 'r': + proto_rx = protostr_to_num(optarg); + break; + default: + die_usage(1); + break; + } + } +} + +/* wait up to timeout milliseconds */ +static void wait_for_ack(int fd, int timeout, size_t total) +{ + int i; + + for (i = 0; i < timeout; i++) { + int nsd, ret, queued = -1; + struct timespec req; + + ret = ioctl(fd, TIOCOUTQ, &queued); + if (ret < 0) + die_perror("TIOCOUTQ"); + + ret = ioctl(fd, SIOCOUTQNSD, &nsd); + if (ret < 0) + die_perror("SIOCOUTQNSD"); + + if ((size_t)queued > total) + xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total); + assert(nsd <= queued); + + if (queued == 0) + return; + + /* wait for peer to ack rx of all data */ + req.tv_sec = 0; + req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */ + nanosleep(&req, NULL); + } + + xerror("still tx data queued after %u ms\n", timeout); +} + +static void connect_one_server(int fd, int unixfd) +{ + size_t len, i, total, sent; + char buf[4096], buf2[4096]; + ssize_t ret; + + len = rand() % (sizeof(buf) - 1); + + if (len < 128) + len = 128; + + for (i = 0; i < len ; i++) { + buf[i] = rand() % 26; + buf[i] += 'A'; + } + + buf[i] = '\n'; + + /* un-block server */ + ret = read(unixfd, buf2, 4); + assert(ret == 4); + + assert(strncmp(buf2, "xmit", 4) == 0); + + ret = write(unixfd, &len, sizeof(len)); + assert(ret == (ssize_t)sizeof(len)); + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + + if (ret != (ssize_t)len) + xerror("short write"); + + ret = read(unixfd, buf2, 4); + assert(strncmp(buf2, "huge", 4) == 0); + + total = rand() % (16 * 1024 * 1024); + total += (1 * 1024 * 1024); + sent = total; + + ret = write(unixfd, &total, sizeof(total)); + assert(ret == (ssize_t)sizeof(total)); + + wait_for_ack(fd, 5000, len); + + while (total > 0) { + if (total > sizeof(buf)) + len = sizeof(buf); + else + len = total; + + ret = write(fd, buf, len); + if (ret < 0) + die_perror("write"); + total -= ret; + + /* we don't have to care about buf content, only + * number of total bytes sent + */ + } + + ret = read(unixfd, buf2, 4); + assert(ret == 4); + assert(strncmp(buf2, "shut", 4) == 0); + + wait_for_ack(fd, 5000, sent); + + ret = write(fd, buf, 1); + assert(ret == 1); + close(fd); + ret = write(unixfd, "closed", 6); + assert(ret == 6); + + close(unixfd); +} + +static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv) +{ + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { + memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv)); + return; + } + } + + xerror("could not find TCP_CM_INQ cmsg type"); +} + +static void process_one_client(int fd, int unixfd) +{ + unsigned int tcp_inq; + size_t expect_len; + char msg_buf[4096]; + char buf[4096]; + char tmp[16]; + struct iovec iov = { + .iov_base = buf, + .iov_len = 1, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + ssize_t ret, tot; + + ret = write(unixfd, "xmit", 4); + assert(ret == 4); + + ret = read(unixfd, &expect_len, sizeof(expect_len)); + assert(ret == (ssize_t)sizeof(expect_len)); + + if (expect_len > sizeof(buf)) + xerror("expect len %zu exceeds buffer size", expect_len); + + for (;;) { + struct timespec req; + unsigned int queued; + + ret = ioctl(fd, FIONREAD, &queued); + if (ret < 0) + die_perror("FIONREAD"); + if (queued > expect_len) + xerror("FIONREAD returned %u, but only %zu expected\n", + queued, expect_len); + if (queued == expect_len) + break; + + req.tv_sec = 0; + req.tv_nsec = 1000 * 1000ul; + nanosleep(&req, NULL); + } + + /* read one byte, expect cmsg to return expected - 1 */ + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + if (msg.msg_controllen == 0) + xerror("msg_controllen is 0"); + + get_tcp_inq(&msg, &tcp_inq); + + assert((size_t)tcp_inq == (expect_len - 1)); + + iov.iov_len = sizeof(buf); + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + /* should have gotten exact remainder of all pending data */ + assert(ret == (ssize_t)tcp_inq); + + /* should be 0, all drained */ + get_tcp_inq(&msg, &tcp_inq); + assert(tcp_inq == 0); + + /* request a large swath of data. */ + ret = write(unixfd, "huge", 4); + assert(ret == 4); + + ret = read(unixfd, &expect_len, sizeof(expect_len)); + assert(ret == (ssize_t)sizeof(expect_len)); + + /* peer should send us a few mb of data */ + if (expect_len <= sizeof(buf)) + xerror("expect len %zu too small\n", expect_len); + + tot = 0; + do { + iov.iov_len = sizeof(buf); + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + tot += ret; + + get_tcp_inq(&msg, &tcp_inq); + + if (tcp_inq > expect_len - tot) + xerror("inq %d, remaining %d total_len %d\n", + tcp_inq, expect_len - tot, (int)expect_len); + + assert(tcp_inq <= expect_len - tot); + } while ((size_t)tot < expect_len); + + ret = write(unixfd, "shut", 4); + assert(ret == 4); + + /* wait for hangup. Should have received one more byte of data. */ + ret = read(unixfd, tmp, sizeof(tmp)); + assert(ret == 6); + assert(strncmp(tmp, "closed", 6) == 0); + + sleep(1); + + iov.iov_len = 1; + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + assert(ret == 1); + + get_tcp_inq(&msg, &tcp_inq); + + /* tcp_inq should be 1 due to received fin. */ + assert(tcp_inq == 1); + + iov.iov_len = 1; + ret = recvmsg(fd, &msg, 0); + if (ret < 0) + die_perror("recvmsg"); + + /* expect EOF */ + assert(ret == 0); + get_tcp_inq(&msg, &tcp_inq); + assert(tcp_inq == 1); + + close(fd); +} + +static int xaccept(int s) +{ + int fd = accept(s, NULL, 0); + + if (fd < 0) + die_perror("accept"); + + return fd; +} + +static int server(int unixfd) +{ + int fd = -1, r, on = 1; + + switch (pf) { + case AF_INET: + fd = sock_listen_mptcp("127.0.0.1", "15432"); + break; + case AF_INET6: + fd = sock_listen_mptcp("::1", "15432"); + break; + default: + xerror("Unknown pf %d\n", pf); + break; + } + + r = write(unixfd, "conn", 4); + assert(r == 4); + + alarm(15); + r = xaccept(fd); + + if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on))) + die_perror("setsockopt"); + + process_one_client(r, unixfd); + + return 0; +} + +static int client(int unixfd) +{ + int fd = -1; + + alarm(15); + + switch (pf) { + case AF_INET: + fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx); + break; + case AF_INET6: + fd = sock_connect_mptcp("::1", "15432", proto_tx); + break; + default: + xerror("Unknown pf %d\n", pf); + } + + connect_one_server(fd, unixfd); + + return 0; +} + +static void init_rng(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + unsigned int foo; + + if (fd > 0) { + int ret = read(fd, &foo, sizeof(foo)); + + if (ret < 0) + srand(fd + foo); + close(fd); + } + + srand(foo); +} + +static pid_t xfork(void) +{ + pid_t p = fork(); + + if (p < 0) + die_perror("fork"); + else if (p == 0) + init_rng(); + + return p; +} + +static int rcheck(int wstatus, const char *what) +{ + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == 0) + return 0; + fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus)); + return WEXITSTATUS(wstatus); + } else if (WIFSIGNALED(wstatus)) { + xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus)); + } else if (WIFSTOPPED(wstatus)) { + xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus)); + } + + return 111; +} + +int main(int argc, char *argv[]) +{ + int e1, e2, wstatus; + pid_t s, c, ret; + int unixfds[2]; + + parse_opts(argc, argv); + + e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds); + if (e1 < 0) + die_perror("pipe"); + + s = xfork(); + if (s == 0) + return server(unixfds[1]); + + close(unixfds[1]); + + /* wait until server bound a socket */ + e1 = read(unixfds[0], &e1, 4); + assert(e1 == 4); + + c = xfork(); + if (c == 0) + return client(unixfds[0]); + + close(unixfds[0]); + + ret = waitpid(s, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e1 = rcheck(wstatus, "server"); + ret = waitpid(c, &wstatus, 0); + if (ret == -1) + die_perror("waitpid"); + e2 = rcheck(wstatus, "client"); + + return e1 ? e1 : e2; +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7ef639a9d4a6..27d0eb9afdca 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -238,6 +238,45 @@ is_v6() [ -z "${1##*:*}" ] } +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +rm_addr_count() +{ + ns=${1} + + ip netns exec ${ns} nstat -as | grep MPTcpExtRmAddr | awk '{print $2}' +} + +# $1: ns, $2: old rm_addr counter in $ns +wait_rm_addr() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + local i + + for i in $(seq 10); do + cnt=$(rm_addr_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + do_transfer() { listener_ns="$1" @@ -307,7 +346,7 @@ do_transfer() fi spid=$! - sleep 1 + wait_local_port_listen "${listener_ns}" "${port}" if [ "$test_link_fail" -eq 0 ];then timeout ${timeout_test} \ @@ -324,10 +363,13 @@ do_transfer() fi cpid=$! + # let the mptcp subflow be established in background before + # do endpoint manipulation + [ $addr_nr_ns1 = "0" -a $addr_nr_ns2 = "0" ] || sleep 1 + if [ $addr_nr_ns1 -gt 0 ]; then let add_nr_ns1=addr_nr_ns1 counter=2 - sleep 1 while [ $add_nr_ns1 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -339,7 +381,6 @@ do_transfer() let counter+=1 let add_nr_ns1-=1 done - sleep 1 elif [ $addr_nr_ns1 -lt 0 ]; then let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then @@ -347,22 +388,19 @@ do_transfer() pos=1 dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then - sleep 1 - while [ $counter -le $rm_nr_ns1 ] do id=${dump[$pos]} + rm_addr=$(rm_addr_count ${connector_ns}) ip netns exec ${listener_ns} ./pm_nl_ctl del $id - sleep 1 + wait_rm_addr ${connector_ns} ${rm_addr} let counter+=1 let pos+=5 done fi elif [ $rm_nr_ns1 -eq 8 ]; then - sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl flush elif [ $rm_nr_ns1 -eq 9 ]; then - sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr} fi fi @@ -373,10 +411,13 @@ do_transfer() addr_nr_ns2=${addr_nr_ns2:9} fi + # if newly added endpoints must be deleted, give the background msk + # some time to created them + [ $addr_nr_ns1 -gt 0 -a $addr_nr_ns2 -lt 0 ] && sleep 1 + if [ $addr_nr_ns2 -gt 0 ]; then let add_nr_ns2=addr_nr_ns2 counter=3 - sleep 1 while [ $add_nr_ns2 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -388,7 +429,6 @@ do_transfer() let counter+=1 let add_nr_ns2-=1 done - sleep 1 elif [ $addr_nr_ns2 -lt 0 ]; then let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then @@ -396,19 +436,18 @@ do_transfer() pos=1 dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) if [ ${#dump[@]} -gt 0 ]; then - sleep 1 - while [ $counter -le $rm_nr_ns2 ] do + # rm_addr are serialized, allow the previous one to complete id=${dump[$pos]} + rm_addr=$(rm_addr_count ${listener_ns}) ip netns exec ${connector_ns} ./pm_nl_ctl del $id - sleep 1 + wait_rm_addr ${listener_ns} ${rm_addr} let counter+=1 let pos+=5 done fi elif [ $rm_nr_ns2 -eq 8 ]; then - sleep 1 ip netns exec ${connector_ns} ./pm_nl_ctl flush elif [ $rm_nr_ns2 -eq 9 ]; then local addr @@ -417,7 +456,6 @@ do_transfer() else addr="10.0.1.2" fi - sleep 1 ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr fi fi @@ -539,6 +577,14 @@ run_tests() lret=$? } +dump_stats() +{ + echo Server ns stats + ip netns exec $ns1 nstat -as | grep Tcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep Tcp +} + chk_csum_nr() { local msg=${1:-""} @@ -570,12 +616,7 @@ chk_csum_nr() else echo "[ ok ]" fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi + [ "${dump_stats}" = 1 ] && dump_stats } chk_fail_nr() @@ -607,12 +648,7 @@ chk_fail_nr() echo "[ ok ]" fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi + [ "${dump_stats}" = 1 ] && dump_stats } chk_join_nr() @@ -656,12 +692,7 @@ chk_join_nr() else echo "[ ok ]" fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi + [ "${dump_stats}" = 1 ] && dump_stats if [ $checksum -eq 1 ]; then chk_csum_nr chk_fail_nr 0 0 @@ -823,12 +854,7 @@ chk_add_nr() echo "" fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi + [ "${dump_stats}" = 1 ] && dump_stats } chk_rm_nr() @@ -871,12 +897,7 @@ chk_rm_nr() echo "[ ok ]" fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi + [ "${dump_stats}" = 1 ] && dump_stats } chk_prio_nr() @@ -908,12 +929,7 @@ chk_prio_nr() echo "[ ok ]" fi - if [ "${dump_stats}" = 1 ]; then - echo Server ns stats - ip netns exec $ns1 nstat -as | grep MPTcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep MPTcp - fi + [ "${dump_stats}" = 1 ] && dump_stats } chk_link_usage() @@ -937,6 +953,22 @@ chk_link_usage() fi } +wait_for_tw() +{ + local timeout_ms=$((timeout_poll * 1000)) + local time=0 + local ns=$1 + + while [ $time -lt $timeout_ms ]; do + local cnt=$(ip netns exec $ns ss -t state time-wait |wc -l) + + [ "$cnt" = 1 ] && return 1 + time=$((time + 100)) + sleep 0.1 + done + return 1 +} + subflows_tests() { reset @@ -994,6 +1026,61 @@ subflows_tests() chk_join_nr "single subflow, dev" 1 1 1 } +subflows_error_tests() +{ + # If a single subflow is configured, and matches the MPC src + # address, no additional subflow should be created + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr "no MPC reuse with single endpoint" 0 0 0 + + # multiple subflows, with subflow creation error + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr "multi subflows, with failing subflow" 1 1 1 + + # multiple subflows, with subflow timeout on MPJ + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl limits 0 2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j DROP + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr "multi subflows, with subflow timeout" 1 1 1 + + # multiple subflows, check that the endpoint corresponding to + # closed subflow (due to reset) is not reused if additional + # subflows are added later + reset + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns1 iptables -A INPUT -s 10.0.3.2 -p tcp -j REJECT + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & + + # updates in the child shell do not have any effect here, we + # need to bump the test counter for the above case + TEST_COUNT=$((TEST_COUNT+1)) + + # mpj subflow will be in TW after the reset + wait_for_tw $ns2 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow + wait + + # additional subflow could be created only if the PM select + # the later endpoint, skipping the already used one + chk_join_nr "multi subflows, fair usage on close" 1 1 1 +} + signal_address_tests() { # add_address, unused @@ -1071,7 +1158,10 @@ signal_address_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal run_tests $ns1 $ns2 10.0.1.1 - chk_add_nr 4 4 + + # the server will not signal the address terminating + # the MPC subflow + chk_add_nr 3 3 } link_failure_tests() @@ -1577,7 +1667,7 @@ add_addr_ports_tests() ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow chk_join_nr "flush subflows and signal with port" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -1802,6 +1892,7 @@ fullmesh_tests() all_tests() { subflows_tests + subflows_error_tests signal_address_tests link_failure_tests add_addr_timeout_tests @@ -1821,6 +1912,7 @@ usage() { echo "mptcp_join usage:" echo " -f subflows_tests" + echo " -e subflows_error_tests" echo " -s signal_address_tests" echo " -l link_failure_tests" echo " -t add_addr_timeout_tests" @@ -1869,11 +1961,14 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkdmchCS' opt; do +while getopts 'fesltra64bpkdmchCS' opt; do case $opt in f) subflows_tests ;; + e) + subflows_error_tests + ;; s) signal_address_tests ;; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 417b11cafafe..ac9a4d9c1764 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -4,6 +4,7 @@ #include <assert.h> #include <errno.h> +#include <fcntl.h> #include <limits.h> #include <string.h> #include <stdarg.h> @@ -13,6 +14,7 @@ #include <stdio.h> #include <stdlib.h> #include <strings.h> +#include <time.h> #include <unistd.h> #include <sys/socket.h> @@ -594,6 +596,44 @@ static int server(int pipefd) return 0; } +static void test_ip_tos_sockopt(int fd) +{ + uint8_t tos_in, tos_out; + socklen_t s; + int r; + + tos_in = rand() & 0xfc; + r = setsockopt(fd, SOL_IP, IP_TOS, &tos_in, sizeof(tos_out)); + if (r != 0) + die_perror("setsockopt IP_TOS"); + + tos_out = 0; + s = sizeof(tos_out); + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != 0) + die_perror("getsockopt IP_TOS"); + + if (tos_in != tos_out) + xerror("tos %x != %x socklen_t %d\n", tos_in, tos_out, s); + + if (s != 1) + xerror("tos should be 1 byte"); + + s = 0; + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != 0) + die_perror("getsockopt IP_TOS 0"); + if (s != 0) + xerror("expect socklen_t == 0"); + + s = -1; + r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s); + if (r != -1 && errno != EINVAL) + die_perror("getsockopt IP_TOS did not indicate -EINVAL"); + if (s != -1) + xerror("expect socklen_t == -1"); +} + static int client(int pipefd) { int fd = -1; @@ -611,6 +651,8 @@ static int client(int pipefd) xerror("Unknown pf %d\n", pf); } + test_ip_tos_sockopt(fd); + connect_one_server(fd, pipefd); return 0; @@ -642,6 +684,25 @@ static int rcheck(int wstatus, const char *what) return 111; } +static void init_rng(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + + if (fd >= 0) { + unsigned int foo; + ssize_t ret; + + /* can't fail */ + ret = read(fd, &foo, sizeof(foo)); + assert(ret == sizeof(foo)); + + close(fd); + srand(foo); + } else { + srand(time(NULL)); + } +} + int main(int argc, char *argv[]) { int e1, e2, wstatus; @@ -650,6 +711,8 @@ int main(int argc, char *argv[]) parse_opts(argc, argv); + init_rng(); + e1 = pipe(pipefds); if (e1 < 0) die_perror("pipe"); diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 41de643788b8..0879da915014 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -178,7 +178,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS,TCPINQ \ ${local_addr} < "$sin" > "$sout" & spid=$! @@ -186,7 +186,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS,TCPINQ \ $connect_addr < "$cin" > "$cout" & cpid=$! @@ -279,6 +279,45 @@ run_tests() fi } +do_tcpinq_test() +{ + ip netns exec "$ns1" ./mptcp_inq "$@" + lret=$? + if [ $lret -ne 0 ];then + ret=$lret + echo "FAIL: mptcp_inq $@" 1>&2 + return $lret + fi + + echo "PASS: TCP_INQ cmsg/ioctl $@" + return $lret +} + +do_tcpinq_tests() +{ + local lret=0 + + ip netns exec "$ns1" iptables -F + ip netns exec "$ns1" ip6tables -F + + for args in "-t tcp" "-r tcp"; do + do_tcpinq_test $args + lret=$? + if [ $lret -ne 0 ] ; then + return $lret + fi + do_tcpinq_test -6 $args + lret=$? + if [ $lret -ne 0 ] ; then + return $lret + fi + done + + do_tcpinq_test -r tcp -t tcp + + return $? +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -300,4 +339,5 @@ if [ $ret -eq 0 ];then echo "PASS: SOL_MPTCP getsockopt has expected information" fi +do_tcpinq_tests exit $ret diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index b599003eb5ba..d9a6fd2cd9d3 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -85,6 +85,7 @@ struct sock_args { int version; /* AF_INET/AF_INET6 */ int use_setsockopt; + int use_freebind; int use_cmsg; const char *dev; const char *server_dev; @@ -514,6 +515,29 @@ static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex) return 0; } +static int set_freebind(int sd, int version) +{ + unsigned int one = 1; + int rc = 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_FREEBIND, &one, sizeof(one))) { + log_err_errno("setsockopt(IP_FREEBIND)"); + rc = -1; + } + break; + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { + log_err_errno("setsockopt(IPV6_FREEBIND"); + rc = -1; + } + break; + } + + return rc; +} + static int set_broadcast(int sd) { unsigned int one = 1; @@ -1419,6 +1443,9 @@ static int lsock_init(struct sock_args *args) set_unicast_if(sd, args->ifindex, args->version)) goto err; + if (args->use_freebind && set_freebind(sd, args->version)) + goto err; + if (bind_socket(sd, args)) goto err; @@ -1827,7 +1854,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbqf" #define OPT_FORCE_BIND_KEY_IFINDEX 1001 #define OPT_NO_BIND_KEY_IFINDEX 1002 @@ -1864,6 +1891,7 @@ static void print_usage(char *prog) " -I dev bind socket to given device name - server mode\n" " -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n" " to set device binding\n" + " -f bind socket with the IP[V6]_FREEBIND option\n" " -C use cmsg and IP_PKTINFO to specify device binding\n" "\n" " -L len send random message of given length\n" @@ -1999,6 +2027,9 @@ int main(int argc, char *argv[]) case 'S': args.use_setsockopt = 1; break; + case 'f': + args.use_freebind = 1; + break; case 'C': args.use_cmsg = 1; break; diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index b5277106df1f..072d709c96b4 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -24,9 +24,7 @@ #include <sys/resource.h> #include <unistd.h> -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#endif +#include "../kselftest.h" struct test_params { int recv_family; diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index e4613ce4ed69..9eb42570294d 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -18,7 +18,7 @@ #include <linux/net_tstamp.h> #include <linux/errqueue.h> -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#include "../kselftest.h" struct options { int so_timestamp; diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index 694d70710ff0..dfc27cdc6c05 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1 @@ -timeout=300 +timeout=1500 diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index afca1ead677f..db1aeb8c5d1e 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -7,6 +7,8 @@ #include <sys/socket.h> #include <netinet/in.h> +#include "../kselftest.h" + struct socket_testcase { int domain; int type; @@ -31,7 +33,6 @@ static struct socket_testcase tests[] = { { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 }, }; -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #define ERR_STRING_SZ 64 static int run_tests(void) diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c index 9c55ec44fc43..c1cb0c75156a 100644 --- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c @@ -26,6 +26,8 @@ #include <fcntl.h> #include <time.h> +#include "../kselftest.h" + #ifndef TCP_FASTOPEN_KEY #define TCP_FASTOPEN_KEY 33 #endif @@ -34,10 +36,6 @@ #define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key" #define KEY_LENGTH 16 -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) -#endif - static bool do_ipv6; static bool do_sockopt; static bool do_rotate; diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh index 4a6f5c3b3215..eb9553e4986b 100755 --- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh +++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh @@ -41,7 +41,7 @@ checktool (){ checktool "iptables --version" "run test without iptables" checktool "ip -Version" "run test without ip tool" -checktool "which nc" "run test without nc (netcat)" +checktool "which socat" "run test without socat" checktool "ip netns add ${r_a}" "create net namespace" for n in ${r_b} ${r_w} ${c_a} ${c_b};do @@ -60,11 +60,12 @@ trap cleanup EXIT test_path() { msg="$1" - ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & + ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null & sleep 1 for i in 1 2 3; do - head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 + head -c1400 /dev/zero | tr "\000" "a" | \ + ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000 done wait @@ -189,7 +190,7 @@ ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null #--------------------- #Now we send a 1400 bytes UDP packet from Client A to Client B: -# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 test_path "without" # The IPv4 stack on Client A already knows the PMTU to Client B, so the diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh index cfee3b65be0f..1092bbcb1fba 100755 --- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh @@ -76,23 +76,23 @@ ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1 sleep 1 # add a persistent connection from the other namespace -ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null & +ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & sleep 1 # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because # 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use -echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null +echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443 >/dev/null ret=$? kill $iperfs -# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). if [ $ret -eq 0 ]; then - echo "PASS: nc can connect via NAT'd address" + echo "PASS: socat can connect via NAT'd address" else - echo "FAIL: nc cannot connect via NAT'd address" + echo "FAIL: socat cannot connect via NAT'd address" exit 1 fi diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d88867d2fed7..349a319a9e51 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -880,8 +880,9 @@ EOF return $ksft_skip fi - # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. - test_port_shadow "default" "CLIENT" + # test default behaviour. Packet from ns1 to ns0 is not redirected + # due to automatic port translation. + test_port_shadow "default" "ROUTER" # test packet filter based mitigation: prevent forwarding of # packets claiming to come from the service port. diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index b0b20e0b4e30..f43aa4b77fba 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -44,7 +44,10 @@ mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush for m in $mitigations do - do_one "$m" & + if [[ -f /sys/kernel/debug/powerpc/$m ]] + then + do_one "$m" & + fi done echo "Spawned threads enabling/disabling mitigations ..." diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index adc2b7294e5f..83647b8277e7 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -193,7 +193,7 @@ int spectre_v2_test(void) * We are not vulnerable and reporting otherwise, so * missing such a mismatch is safe. */ - if (state == VULNERABLE) + if (miss_percent > 95) return 4; return 1; diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore index ce3375cd8e73..9d0915777fed 100644 --- a/tools/testing/selftests/powerpc/signal/.gitignore +++ b/tools/testing/selftests/powerpc/signal/.gitignore @@ -4,3 +4,5 @@ signal_tm sigfuz sigreturn_vdso sig_sc_double_restart +sigreturn_kernel +sigreturn_unaligned diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index d6ae54663aed..f679d260afc8 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart +TEST_GEN_PROGS += sigreturn_kernel +TEST_GEN_PROGS += sigreturn_unaligned CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c new file mode 100644 index 000000000000..0a1b6e591eee --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that we can't sigreturn to kernel addresses, or to kernel mode. + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +#define MSR_PR (1ul << 14) + +static volatile unsigned long long sigreturn_addr; +static volatile unsigned long long sigreturn_msr_mask; + +static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr) +{ + ucontext_t *uc = (ucontext_t *)uc_ptr; + + if (sigreturn_addr) + UCONTEXT_NIA(uc) = sigreturn_addr; + + if (sigreturn_msr_mask) + UCONTEXT_MSR(uc) &= sigreturn_msr_mask; +} + +static pid_t fork_child(void) +{ + pid_t pid; + + pid = fork(); + if (pid == 0) { + raise(SIGUSR1); + exit(0); + } + + return pid; +} + +static int expect_segv(pid_t pid) +{ + int child_ret; + + waitpid(pid, &child_ret, 0); + FAIL_IF(WIFEXITED(child_ret)); + FAIL_IF(!WIFSIGNALED(child_ret)); + FAIL_IF(WTERMSIG(child_ret) != 11); + + return 0; +} + +int test_sigreturn_kernel(void) +{ + struct sigaction act; + int child_ret, i; + pid_t pid; + + act.sa_sigaction = sigusr1_handler; + act.sa_flags = SA_SIGINFO; + sigemptyset(&act.sa_mask); + + FAIL_IF(sigaction(SIGUSR1, &act, NULL)); + + for (i = 0; i < 2; i++) { + // Return to kernel + sigreturn_addr = 0xcull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to kernel virtual + sigreturn_addr = 0xc008ull << 48; + pid = fork_child(); + expect_segv(pid); + + // Return out of range + sigreturn_addr = 0xc010ull << 48; + pid = fork_child(); + expect_segv(pid); + + // Return to no-man's land, just below PAGE_OFFSET + sigreturn_addr = (0xcull << 60) - (64 * 1024); + pid = fork_child(); + expect_segv(pid); + + // Return to no-man's land, above TASK_SIZE_4PB + sigreturn_addr = 0x1ull << 52; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xd space + sigreturn_addr = 0xdull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xe space + sigreturn_addr = 0xeull << 60; + pid = fork_child(); + expect_segv(pid); + + // Return to 0xf space + sigreturn_addr = 0xfull << 60; + pid = fork_child(); + expect_segv(pid); + + // Attempt to set PR=0 for 2nd loop (should be blocked by kernel) + sigreturn_msr_mask = ~MSR_PR; + } + + printf("All children killed as expected\n"); + + // Don't change address, just MSR, should return to user as normal + sigreturn_addr = 0; + sigreturn_msr_mask = ~MSR_PR; + pid = fork_child(); + waitpid(pid, &child_ret, 0); + FAIL_IF(!WIFEXITED(child_ret)); + FAIL_IF(WIFSIGNALED(child_ret)); + FAIL_IF(WEXITSTATUS(child_ret) != 0); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_kernel, "sigreturn_kernel"); +} diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c new file mode 100644 index 000000000000..6e58ee4f0fdf --- /dev/null +++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test sigreturn to an unaligned address, ie. low 2 bits set. + * Nothing bad should happen. + * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> + +#include "utils.h" + + +static void sigusr1_handler(int signo, siginfo_t *info, void *ptr) +{ + ucontext_t *uc = ptr; + + UCONTEXT_NIA(uc) |= 3; +} + +static int test_sigreturn_unaligned(void) +{ + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_sigaction = sigusr1_handler; + action.sa_flags = SA_SIGINFO; + + FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1); + + raise(SIGUSR1); + + return 0; +} + +int main(void) +{ + return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned"); +} diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index f7911aaeb007..c0f6a062364d 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -354,6 +354,18 @@ int main(int argc, char *argv[]) } } + if (pin_index >= 0) { + memset(&desc, 0, sizeof(desc)); + desc.index = pin_index; + desc.func = pin_func; + desc.chan = index; + if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) { + perror("PTP_PIN_SETFUNC"); + } else { + puts("set pin function okay"); + } + } + if (extts) { memset(&extts_request, 0, sizeof(extts_request)); extts_request.index = index; @@ -444,18 +456,6 @@ int main(int argc, char *argv[]) } } - if (pin_index >= 0) { - memset(&desc, 0, sizeof(desc)); - desc.index = pin_index; - desc.func = pin_func; - desc.chan = index; - if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) { - perror("PTP_PIN_SETFUNC"); - } else { - puts("set pin function okay"); - } - } - if (pps != -1) { int enable = pps ? 1 : 0; if (ioctl(fd, PTP_ENABLE_PPS, enable)) { diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh index daf64b507038..2e9e9e2eedb6 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -30,9 +30,9 @@ editor=${EDITOR-vi} files= for i in ${rundir}/*/Make.out do - if egrep -q "error:|warning:" < $i + if egrep -q "error:|warning:|^ld: .*undefined reference to" < $i then - egrep "error:|warning:" < $i > $i.diags + egrep "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags files="$files $i.diags $i" fi done diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index fbdf162b6acd..1c4c2c727dad 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -25,7 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^\[[ 0-9.]*] //' | awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | tr -d '\012\015'`" -fwdprog="`grep 'rcu_torture_fwd_prog_cr Duration' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k15nr | head -1 | awk '{ print $14 " " $15 }'`" +fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }'`" if test -z "$ngps" then echo "$configfile ------- " $stopstate diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index c7d42ef80c53..e09b1bc78708 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -144,7 +144,7 @@ do if test "$ret" -ne 0 then echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" - exit 4 | tee -a "$oldrun/remote-log" + exit 4 fi done @@ -157,8 +157,15 @@ do ret=$? if test "$ret" -ne 0 then - echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" - exit 10 | tee -a "$oldrun/remote-log" + echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. | tee -a "$oldrun/remote-log" + sleep 60 + cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" + ret=$? + if test "$ret" -ne 0 + then + echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" + exit 10 + fi fi done @@ -177,16 +184,16 @@ checkremotefile () { ret=$? if test "$ret" -eq 255 then - echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` + echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" elif test "$ret" -eq 0 then return 0 elif test "$ret" -eq 1 then - echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" + echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log" return 1 else - echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` + echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" return $ret fi sleep $sleeptime @@ -245,7 +252,7 @@ do sleep 30 fi done -echo All batches started. `date` +echo All batches started. `date` | tee -a "$oldrun/remote-log" # Wait for all remaining scenarios to complete and collect results. for i in $systems @@ -254,7 +261,7 @@ do do sleep 30 done - echo " ---" Collecting results from $i `date` + echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) done diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 6cf9ec6a3d1c..6de0c183db5b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -74,7 +74,9 @@ usage () { echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" + echo " --kasan" echo " --kconfig Kconfig-options" + echo " --kcsan" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --memory megabytes|nnnG" @@ -83,6 +85,7 @@ usage () { echo " --qemu-cmd qemu-system-..." echo " --remote" echo " --results absolute-pathname" + echo " --shutdown-grace seconds" echo " --torture lock|rcu|rcuscale|refscale|scf" echo " --trust-make" exit 1 @@ -175,14 +178,14 @@ do jitter="$2" shift ;; + --kasan) + TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + ;; --kconfig|--kconfigs) checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; - --kasan) - TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG - ;; --kcsan) TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 9313e5065ae9..2dbfca3589b1 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -39,7 +39,8 @@ fi grep warning: < $F > $T/warnings grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings -cat $T/hwarnings $T/cwarnings > $T/rcuwarnings +grep "^ld: .*undefined reference to" $T/warnings | head -1 > $T/ldwarnings +cat $T/hwarnings $T/cwarnings $T/ldwarnings > $T/rcuwarnings if test -s $T/rcuwarnings then print_warning $title build errors: diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T index d6557c38dfe4..c70cf0405f24 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_SRCU=y CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U index 6bc24e99862f..bc9eeabaa1b1 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_SRCU=y CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot index 22cdeced98ea..30ca5b493c4b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot @@ -1,2 +1,3 @@ rcutorture.torture_type=tasks rcutree.use_softirq=0 +rcupdate.rcu_task_enqueue_lim=4 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 index 6db705e55487..0953c52fcfd7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index d8674264318d..30439f6fc20e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -2,6 +2,7 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=y CONFIG_NO_HZ_IDLE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot index 9675ad632dcc..ba6d636a4856 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks-tracing +rcupdate.rcu_task_enqueue_lim=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot index 9675ad632dcc..c70b5db6c2ae 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot @@ -1 +1,2 @@ rcutorture.torture_type=tasks-tracing +rcutorture.fwd_progress=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index b5b53973c01e..8ae41d5f81a3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_MAXSMP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 65daee4fbf5a..2871ee599891 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot new file mode 100644 index 000000000000..dd914fa8f690 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot @@ -0,0 +1 @@ +rcutorture.fwd_progress=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index f6d6a40c0576..22ad0261728d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 4f95f8544f3f..9f48c73709ec 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index bf4980d606b5..db27651de04b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index d7afb271a586..2789b47e4ecd 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index c810c5276a89..8b561355b9ef 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index 7311f84a5876..4a00539bfdd7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot new file mode 100644 index 000000000000..dd914fa8f690 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot @@ -0,0 +1 @@ +rcutorture.fwd_progress=2 diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index fb05ef5279b4..0fa2dc086e10 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY @@ -2,11 +2,11 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_TINY_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 index e6baa2fbaeb3..227aba7783af 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 @@ -5,7 +5,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE index 4cc1cc581321..f110d9ffbe4c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 index f5952061fde7..9f83e5372796 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 @@ -7,7 +7,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index ad505a887bec..7f06838a91e6 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -6,7 +6,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT index 4f08e641bb6b..52e3ef674056 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT @@ -6,7 +6,6 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n CONFIG_HOTPLUG_CPU=y CONFIG_SUSPEND=n CONFIG_HIBERNATION=n diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index 1b96d68473b8..42acb1a64ce1 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -15,7 +15,6 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs. CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with rcu_nocbs=0, and one with all rcu_nocbs CPUs. CONFIG_RCU_TRACE -- Do half. diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index eb3f6db36d36..b953a52ff706 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -9,10 +9,9 @@ #include <string.h> #include <stddef.h> +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - struct percpu_lock_entry { intptr_t v; } __attribute__((aligned(128))); diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 7159eb777fd3..fb440dfca158 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -27,10 +27,9 @@ #include <signal.h> #include <limits.h> +#include "../kselftest.h" #include "rseq.h" -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - __thread volatile struct rseq __rseq_abi = { .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, }; diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 6e5102a7d7c9..5b5c9d558dee 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -18,7 +18,7 @@ #include <sys/syscall.h> #include <sys/types.h> -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#include "../kselftest.h" unsigned long long timing(clockid_t clk_id, unsigned long long samples) { diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index d425688cf59c..9d126d7fabdb 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1487,7 +1487,7 @@ TEST_F(precedence, log_is_fifth_in_any_order) #define PTRACE_EVENT_SECCOMP 7 #endif -#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) +#define PTRACE_EVENT_MASK(status) ((status) >> 16) bool tracer_running; void tracer_stop(int sig) { @@ -1539,12 +1539,22 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, if (wait(&status) != tracee) continue; - if (WIFSIGNALED(status) || WIFEXITED(status)) - /* Child is dead. Time to go. */ + + if (WIFSIGNALED(status)) { + /* Child caught a fatal signal. */ + return; + } + if (WIFEXITED(status)) { + /* Child exited with code. */ return; + } - /* Check if this is a seccomp event. */ - ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status)); + /* Check if we got an expected event. */ + ASSERT_EQ(WIFCONTINUED(status), false); + ASSERT_EQ(WIFSTOPPED(status), true); + ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) { + TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); + } tracer_func(_metadata, tracee, status, args); @@ -1961,6 +1971,11 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, int ret; unsigned long msg; + EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) { + TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status)); + return; + } + /* Make sure we got the right message. */ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); EXPECT_EQ(0, ret); @@ -2011,6 +2026,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, long *syscall_nr = NULL, *syscall_ret = NULL; FIXTURE_DATA(TRACE_syscall) *self = args; + EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) { + TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status)); + return; + } + /* * The traditional way to tell PTRACE_SYSCALL entry/exit * is by counting. @@ -2128,6 +2148,7 @@ FIXTURE_SETUP(TRACE_syscall) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); + /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */ if (variant->use_ptrace) return; @@ -2186,6 +2207,29 @@ TEST_F(TRACE_syscall, syscall_faked) EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } +TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + /* Install "kill on mknodat" filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + /* This should immediately die with SIGSYS, regardless of tracer. */ + EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0)); +} + TEST_F(TRACE_syscall, skip_after) { struct sock_filter filter[] = { @@ -4087,7 +4131,7 @@ TEST(user_notification_addfd) * lowest available fd to be assigned here. */ EXPECT_EQ(fd, nextfd++); - EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* * This sets the ID of the ADD FD to the last request plus 1. The diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 7f12d55b97f8..2956584e1e37 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -45,7 +45,7 @@ $(OUTPUT)/sign_key.o: sign_key.S $(CC) $(HOST_CFLAGS) -c $< -o $@ $(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S - $(CC) $(ENCL_CFLAGS) -T $^ -o $@ + $(CC) $(ENCL_CFLAGS) -T $^ -o $@ -Wl,--build-id=none EXTRA_CLEAN := \ $(OUTPUT)/test_encl.elf \ diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index f88562afcaa0..02d775789ea7 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -19,13 +19,38 @@ #include "../../../../arch/x86/include/uapi/asm/sgx.h" enum encl_op_type { - ENCL_OP_PUT, - ENCL_OP_GET, + ENCL_OP_PUT_TO_BUFFER, + ENCL_OP_GET_FROM_BUFFER, + ENCL_OP_PUT_TO_ADDRESS, + ENCL_OP_GET_FROM_ADDRESS, + ENCL_OP_NOP, + ENCL_OP_MAX, }; -struct encl_op { +struct encl_op_header { uint64_t type; - uint64_t buffer; +}; + +struct encl_op_put_to_buf { + struct encl_op_header header; + uint64_t value; +}; + +struct encl_op_get_from_buf { + struct encl_op_header header; + uint64_t value; +}; + +struct encl_op_put_to_addr { + struct encl_op_header header; + uint64_t value; + uint64_t addr; +}; + +struct encl_op_get_from_addr { + struct encl_op_header header; + uint64_t value; + uint64_t addr; }; #endif /* DEFINES_H */ diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 3ebe5d1fe337..9d4322c946e2 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -21,6 +21,8 @@ void encl_delete(struct encl *encl) { + struct encl_segment *heap_seg = &encl->segment_tbl[encl->nr_segments - 1]; + if (encl->encl_base) munmap((void *)encl->encl_base, encl->encl_size); @@ -30,6 +32,8 @@ void encl_delete(struct encl *encl) if (encl->fd) close(encl->fd); + munmap(heap_seg->src, heap_seg->size); + if (encl->segment_tbl) free(encl->segment_tbl); @@ -107,11 +111,14 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) memset(&secinfo, 0, sizeof(secinfo)); secinfo.flags = seg->flags; - ioc.src = (uint64_t)encl->src + seg->offset; + ioc.src = (uint64_t)seg->src; ioc.offset = seg->offset; ioc.length = seg->size; ioc.secinfo = (unsigned long)&secinfo; - ioc.flags = SGX_PAGE_MEASURE; + if (seg->measure) + ioc.flags = SGX_PAGE_MEASURE; + else + ioc.flags = 0; rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc); if (rc < 0) { @@ -122,11 +129,10 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) return true; } - - -bool encl_load(const char *path, struct encl *encl) +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size) { const char device_path[] = "/dev/sgx_enclave"; + struct encl_segment *seg; Elf64_Phdr *phdr_tbl; off_t src_offset; Elf64_Ehdr *ehdr; @@ -178,6 +184,8 @@ bool encl_load(const char *path, struct encl *encl) ehdr = encl->bin; phdr_tbl = encl->bin + ehdr->e_phoff; + encl->nr_segments = 1; /* one for the heap */ + for (i = 0; i < ehdr->e_phnum; i++) { Elf64_Phdr *phdr = &phdr_tbl[i]; @@ -193,7 +201,6 @@ bool encl_load(const char *path, struct encl *encl) for (i = 0, j = 0; i < ehdr->e_phnum; i++) { Elf64_Phdr *phdr = &phdr_tbl[i]; unsigned int flags = phdr->p_flags; - struct encl_segment *seg; if (phdr->p_type != PT_LOAD) continue; @@ -216,6 +223,7 @@ bool encl_load(const char *path, struct encl *encl) if (j == 0) { src_offset = phdr->p_offset & PAGE_MASK; + encl->src = encl->bin + src_offset; seg->prot = PROT_READ | PROT_WRITE; seg->flags = SGX_PAGE_TYPE_TCS << 8; @@ -228,15 +236,27 @@ bool encl_load(const char *path, struct encl *encl) seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset; seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK; + seg->src = encl->src + seg->offset; + seg->measure = true; j++; } - assert(j == encl->nr_segments); + assert(j == encl->nr_segments - 1); + + seg = &encl->segment_tbl[j]; + seg->offset = encl->segment_tbl[j - 1].offset + encl->segment_tbl[j - 1].size; + seg->size = heap_size; + seg->src = mmap(NULL, heap_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + seg->prot = PROT_READ | PROT_WRITE; + seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot; + seg->measure = false; + + if (seg->src == MAP_FAILED) + goto err; - encl->src = encl->bin + src_offset; - encl->src_size = encl->segment_tbl[j - 1].offset + - encl->segment_tbl[j - 1].size; + encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size; for (encl->encl_size = 4096; encl->encl_size < encl->src_size; ) encl->encl_size <<= 1; diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index e252015e0c15..370c4995f7c4 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2016-20 Intel Corporation. */ +#include <cpuid.h> #include <elf.h> #include <errno.h> #include <fcntl.h> @@ -21,6 +22,7 @@ #include "main.h" static const uint64_t MAGIC = 0x1122334455667788ULL; +static const uint64_t MAGIC2 = 0x8877665544332211ULL; vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave; struct vdso_symtab { @@ -107,12 +109,32 @@ static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name) return NULL; } +/* + * Return the offset in the enclave where the data segment can be found. + * The first RW segment loaded is the TCS, skip that to get info on the + * data segment. + */ +static off_t encl_get_data_offset(struct encl *encl) +{ + int i; + + for (i = 1; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; + + if (seg->prot == (PROT_READ | PROT_WRITE)) + return seg->offset; + } + + return -1; +} + FIXTURE(enclave) { struct encl encl; struct sgx_enclave_run run; }; -FIXTURE_SETUP(enclave) +static bool setup_test_encl(unsigned long heap_size, struct encl *encl, + struct __test_metadata *_metadata) { Elf64_Sym *sgx_enter_enclave_sym = NULL; struct vdso_symtab symtab; @@ -122,31 +144,25 @@ FIXTURE_SETUP(enclave) unsigned int i; void *addr; - if (!encl_load("test_encl.elf", &self->encl)) { - encl_delete(&self->encl); - ksft_exit_skip("cannot load enclaves\n"); + if (!encl_load("test_encl.elf", encl, heap_size)) { + encl_delete(encl); + TH_LOG("Failed to load the test enclave.\n"); } - for (i = 0; i < self->encl.nr_segments; i++) { - seg = &self->encl.segment_tbl[i]; - - TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot); - } - - if (!encl_measure(&self->encl)) + if (!encl_measure(encl)) goto err; - if (!encl_build(&self->encl)) + if (!encl_build(encl)) goto err; /* * An enclave consumer only must do this. */ - for (i = 0; i < self->encl.nr_segments; i++) { - struct encl_segment *seg = &self->encl.segment_tbl[i]; + for (i = 0; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; - addr = mmap((void *)self->encl.encl_base + seg->offset, seg->size, - seg->prot, MAP_SHARED | MAP_FIXED, self->encl.fd, 0); + addr = mmap((void *)encl->encl_base + seg->offset, seg->size, + seg->prot, MAP_SHARED | MAP_FIXED, encl->fd, 0); EXPECT_NE(addr, MAP_FAILED); if (addr == MAP_FAILED) goto err; @@ -166,8 +182,16 @@ FIXTURE_SETUP(enclave) vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value; - memset(&self->run, 0, sizeof(self->run)); - self->run.tcs = self->encl.encl_base; + return true; + +err: + encl_delete(encl); + + for (i = 0; i < encl->nr_segments; i++) { + seg = &encl->segment_tbl[i]; + + TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot); + } maps_file = fopen("/proc/self/maps", "r"); if (maps_file != NULL) { @@ -181,11 +205,13 @@ FIXTURE_SETUP(enclave) fclose(maps_file); } -err: - if (!sgx_enter_enclave_sym) - encl_delete(&self->encl); + TH_LOG("Failed to initialize the test enclave.\n"); + + return false; +} - ASSERT_NE(sgx_enter_enclave_sym, NULL); +FIXTURE_SETUP(enclave) +{ } FIXTURE_TEARDOWN(enclave) @@ -215,44 +241,130 @@ FIXTURE_TEARDOWN(enclave) TEST_F(enclave, unclobbered_vdso) { - struct encl_op op; + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; + put_op.value = MAGIC; + + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); + + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; + get_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); + + EXPECT_EQ(get_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); +} + +/* + * A section metric is concatenated in a way that @low bits 12-31 define the + * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the + * metric. + */ +static unsigned long sgx_calc_section_metric(unsigned int low, + unsigned int high) +{ + return (low & GENMASK_ULL(31, 12)) + + ((high & GENMASK_ULL(19, 0)) << 32); +} + +/* + * Sum total available physical SGX memory across all EPC sections + * + * Return: total available physical SGX memory available on system + */ +static unsigned long get_total_epc_mem(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned long total_size = 0; + unsigned int type; + int section = 0; + + while (true) { + __cpuid_count(SGX_CPUID, section + SGX_CPUID_EPC, eax, ebx, ecx, edx); + + type = eax & SGX_CPUID_EPC_MASK; + if (type == SGX_CPUID_EPC_INVALID) + break; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + if (type != SGX_CPUID_EPC_SECTION) + break; - EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + total_size += sgx_calc_section_metric(ecx, edx); + + section++; + } + + return total_size; +} + +TEST_F(enclave, unclobbered_vdso_oversubscribed) +{ + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; + unsigned long total_mem; + + total_mem = get_total_epc_mem(); + ASSERT_NE(total_mem, 0); + ASSERT_TRUE(setup_test_encl(total_mem, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; + put_op.value = MAGIC; + + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); + } TEST_F(enclave, clobbered_vdso) { - struct encl_op op; + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; + put_op.value = MAGIC; + + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); } @@ -267,27 +379,179 @@ static int test_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r TEST_F(enclave, clobbered_vdso_and_user_function) { - struct encl_op op; + struct encl_op_get_from_buf get_op; + struct encl_op_put_to_buf put_op; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; self->run.user_handler = (__u64)test_handler; self->run.user_data = 0xdeadbeef; - op.type = ENCL_OP_PUT; - op.buffer = MAGIC; + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; + put_op.value = MAGIC; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); - op.type = ENCL_OP_GET; - op.buffer = 0; + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; + get_op.value = 0; - EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0); - EXPECT_EQ(op.buffer, MAGIC); + EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); } +/* + * Sanity check that it is possible to enter either of the two hardcoded TCS + */ +TEST_F(enclave, tcs_entry) +{ + struct encl_op_header op; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + op.type = ENCL_OP_NOP; + + EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Move to the next TCS. */ + self->run.tcs = self->encl.encl_base + PAGE_SIZE; + + EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); +} + +/* + * Second page of .data segment is used to test changing PTE permissions. + * This spans the local encl_buffer within the test enclave. + * + * 1) Start with a sanity check: a value is written to the target page within + * the enclave and read back to ensure target page can be written to. + * 2) Change PTE permissions (RW -> RO) of target page within enclave. + * 3) Repeat (1) - this time expecting a regular #PF communicated via the + * vDSO. + * 4) Change PTE permissions of target page within enclave back to be RW. + * 5) Repeat (1) by resuming enclave, now expected to be possible to write to + * and read from target page within enclave. + */ +TEST_F(enclave, pte_permissions) +{ + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + unsigned long data_start; + int ret; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + + PAGE_SIZE; + + /* + * Sanity check to ensure it is possible to write to page that will + * have its permissions manipulated. + */ + + /* Write MAGIC to page */ + put_addr_op.value = MAGIC; + put_addr_op.addr = data_start; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory that was just written to, confirming that it is the + * value previously written (MAGIC). + */ + get_addr_op.value = 0; + get_addr_op.addr = data_start; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Change PTE permissions of target page within the enclave */ + ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ); + if (ret) + perror("mprotect"); + + /* + * PTE permissions of target page changed to read-only, EPCM + * permissions unchanged (EPCM permissions are RW), attempt to + * write to the page, expecting a regular #PF. + */ + + put_addr_op.value = MAGIC2; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EQ(self->run.exception_vector, 14); + EXPECT_EQ(self->run.exception_error_code, 0x7); + EXPECT_EQ(self->run.exception_addr, data_start); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* + * Change PTE permissions back to enable enclave to write to the + * target page and resume enclave - do not expect any exceptions this + * time. + */ + ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ | PROT_WRITE); + if (ret) + perror("mprotect"); + + EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0, + 0, ERESUME, 0, 0, &self->run), + 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + get_addr_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC2); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index 68672fd86cf9..b45c52ec7ab3 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -6,11 +6,15 @@ #ifndef MAIN_H #define MAIN_H +#define ENCL_HEAP_SIZE_DEFAULT 4096 + struct encl_segment { + void *src; off_t offset; size_t size; unsigned int prot; unsigned int flags; + bool measure; }; struct encl { @@ -31,7 +35,7 @@ extern unsigned char sign_key[]; extern unsigned char sign_key_end[]; void encl_delete(struct encl *ctx); -bool encl_load(const char *path, struct encl *encl); +bool encl_load(const char *path, struct encl *encl, unsigned long heap_size); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index 92bbc5a15c39..50c5ab1aa6fa 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -289,15 +289,17 @@ static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset, static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl, struct encl_segment *seg) { - uint64_t end = seg->offset + seg->size; + uint64_t end = seg->size; uint64_t offset; - for (offset = seg->offset; offset < end; offset += PAGE_SIZE) { - if (!mrenclave_eadd(ctx, offset, seg->flags)) + for (offset = 0; offset < end; offset += PAGE_SIZE) { + if (!mrenclave_eadd(ctx, seg->offset + offset, seg->flags)) return false; - if (!mrenclave_eextend(ctx, offset, encl->src + offset)) - return false; + if (seg->measure) { + if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset)) + return false; + } } return true; diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 734ea52f9924..4fca01cfd898 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -4,6 +4,11 @@ #include <stddef.h> #include "defines.h" +/* + * Data buffer spanning two pages that will be placed first in .data + * segment. Even if not used internally the second page is needed by + * external test manipulating page permissions. + */ static uint8_t encl_buffer[8192] = { 1 }; static void *memcpy(void *dest, const void *src, size_t n) @@ -16,20 +21,51 @@ static void *memcpy(void *dest, const void *src, size_t n) return dest; } -void encl_body(void *rdi, void *rsi) +static void do_encl_op_put_to_buf(void *op) +{ + struct encl_op_put_to_buf *op2 = op; + + memcpy(&encl_buffer[0], &op2->value, 8); +} + +static void do_encl_op_get_from_buf(void *op) { - struct encl_op *op = (struct encl_op *)rdi; + struct encl_op_get_from_buf *op2 = op; + + memcpy(&op2->value, &encl_buffer[0], 8); +} + +static void do_encl_op_put_to_addr(void *_op) +{ + struct encl_op_put_to_addr *op = _op; + + memcpy((void *)op->addr, &op->value, 8); +} - switch (op->type) { - case ENCL_OP_PUT: - memcpy(&encl_buffer[0], &op->buffer, 8); - break; +static void do_encl_op_get_from_addr(void *_op) +{ + struct encl_op_get_from_addr *op = _op; + + memcpy(&op->value, (void *)op->addr, 8); +} + +static void do_encl_op_nop(void *_op) +{ + +} + +void encl_body(void *rdi, void *rsi) +{ + const void (*encl_op_array[ENCL_OP_MAX])(void *) = { + do_encl_op_put_to_buf, + do_encl_op_get_from_buf, + do_encl_op_put_to_addr, + do_encl_op_get_from_addr, + do_encl_op_nop, + }; - case ENCL_OP_GET: - memcpy(&op->buffer, &encl_buffer[0], 8); - break; + struct encl_op_header *op = (struct encl_op_header *)rdi; - default: - break; - } + if (op->type < ENCL_OP_MAX) + (*encl_op_array[op->type])(op); } diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 5d5680d4ea39..82fb0dfcbd23 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -12,7 +12,7 @@ .fill 1, 8, 0 # STATE (set by CPU) .fill 1, 8, 0 # FLAGS - .quad encl_ssa # OSSA + .quad encl_ssa_tcs1 # OSSA .fill 1, 4, 0 # CSSA (set by CPU) .fill 1, 4, 1 # NSSA .quad encl_entry # OENTRY @@ -23,10 +23,10 @@ .fill 1, 4, 0xFFFFFFFF # GSLIMIT .fill 4024, 1, 0 # Reserved - # Identical to the previous TCS. + # TCS2 .fill 1, 8, 0 # STATE (set by CPU) .fill 1, 8, 0 # FLAGS - .quad encl_ssa # OSSA + .quad encl_ssa_tcs2 # OSSA .fill 1, 4, 0 # CSSA (set by CPU) .fill 1, 4, 1 # NSSA .quad encl_entry # OENTRY @@ -40,8 +40,9 @@ .text encl_entry: - # RBX contains the base address for TCS, which is also the first address - # inside the enclave. By adding the value of le_stack_end to it, we get + # RBX contains the base address for TCS, which is the first address + # inside the enclave for TCS #1 and one page into the enclave for + # TCS #2. By adding the value of encl_stack to it, we get # the absolute address for the stack. lea (encl_stack)(%rbx), %rax xchg %rsp, %rax @@ -81,9 +82,15 @@ encl_entry: .section ".data", "aw" -encl_ssa: +encl_ssa_tcs1: + .space 4096 +encl_ssa_tcs2: .space 4096 .balign 4096 - .space 8192 + # Stack of TCS #1 + .space 4096 encl_stack: + .balign 4096 + # Stack of TCS #2 + .space 4096 diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c index 95d93c6a88a5..84e5d9fd20b0 100644 --- a/tools/testing/selftests/sparc64/drivers/adi-test.c +++ b/tools/testing/selftests/sparc64/drivers/adi-test.c @@ -24,10 +24,6 @@ #define DEBUG_LEVEL_4_BIT (0x0008) #define DEBUG_TIMING_BIT (0x1000) -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - /* bit mask of enabled bits to print */ #define DEBUG 0x0001 diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 8e45792703ed..b7205a069534 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -812,5 +812,29 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "7d64", + "name": "Add police action with skip_hw option", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 100 skip_hw", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action police | grep skip_hw", + "matchPattern": "skip_hw", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json index 51799874a972..2df68017dfb8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json @@ -387,5 +387,77 @@ "$TC qdisc del dev $DUMMY ingress", "$IP link del dev $DUMMY type dummy" ] + }, + { + "id": "3329", + "name": "Validate flags of the matchall filter with skip_sw and police action with skip_hw", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions flush action police", + "$TC actions add action police rate 1mbit burst 100k index 199 skip_hw" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199", + "expExitCode": "2", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions del action police index 199" + ] + }, + { + "id": "0eeb", + "name": "Validate flags of the matchall filter with skip_hw and police action", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions flush action police", + "$TC actions add action police rate 1mbit burst 100k index 199" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_hw action police index 199", + "expExitCode": "2", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions del action police index 199" + ] + }, + { + "id": "eee4", + "name": "Validate flags of the matchall filter with skip_sw and police action", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC actions flush action police", + "$TC actions add action police rate 1mbit burst 100k index 199" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199", + "expExitCode": "2", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy", + "$TC actions del action police index 199" + ] } ] diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c index f2519154208a..1833ca97eb24 100644 --- a/tools/testing/selftests/timens/procfs.c +++ b/tools/testing/selftests/timens/procfs.c @@ -24,8 +24,6 @@ #define DAY_IN_SEC (60*60*24) #define TEN_DAYS_IN_SEC (10*DAY_IN_SEC) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - static int child_ns, parent_ns; static int switch_ns(int fd) diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c index 52b6a1185f52..387220791a05 100644 --- a/tools/testing/selftests/timens/timens.c +++ b/tools/testing/selftests/timens/timens.c @@ -22,8 +22,6 @@ #define DAY_IN_SEC (60*60*24) #define TEN_DAYS_IN_SEC (10*DAY_IN_SEC) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) - struct test_clock { clockid_t id; char *name; diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 4da09dbf83ba..54da4b088f4c 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -79,7 +79,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index 022d3ffe3fbf..e6756d9c60a7 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -72,7 +72,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh index fe8fcfb334e0..a5cb4b09a46c 100644 --- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then reservation_usage_file=rsvd.current fi -cgroup_path=/dev/cgroup/memory -if [[ ! -e $cgroup_path ]]; then - mkdir -p $cgroup_path - if [[ $cgroup2 ]]; then +if [[ $cgroup2 ]]; then + cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path - else + do_umount=1 + fi + echo "+hugetlb" >$cgroup_path/cgroup.subtree_control +else + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + if [[ -z "$cgroup_path" ]]; then + cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path + do_umount=1 fi fi - -if [[ $cgroup2 ]]; then - echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control -fi +export cgroup_path function cleanup() { if [[ $cgroup2 ]]; then @@ -108,7 +112,7 @@ function setup_cgroup() { function wait_for_hugetlb_memory_to_get_depleted() { local cgroup="$1" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" # Wait for hugetlbfs memory to get depleted. while [ $(cat $path) != 0 ]; do echo Waiting for hugetlb memory to get depleted. @@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() { local cgroup="$1" local size="$2" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" # Wait for hugetlbfs memory to get written. while [ $(cat $path) != $size ]; do echo Waiting for hugetlb memory reservation to reach size $size. @@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() { local cgroup="$1" local size="$2" - local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file" + local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file" # Wait for hugetlbfs memory to get written. while [ $(cat $path) != $size ]; do echo Waiting for hugetlb memory to reach size $size. @@ -574,5 +578,7 @@ for populate in "" "-o"; do done # populate done # method -umount $cgroup_path -rmdir $cgroup_path +if [[ $do_umount ]]; then + umount $cgroup_path + rmdir $cgroup_path +fi diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 864f126ffd78..203323967b50 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown) /* * Test memory snapshot without faulting in pages accessed by the device. */ +TEST_F(hmm, mixedmap) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned char *m; + int ret; + + npages = 1; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + self->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ); + + hmm_buffer_free(buffer); +} + +/* + * Test memory snapshot without faulting in pages accessed by the device. + */ TEST_F(hmm2, snapshot) { struct hmm_buffer *buffer; diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 257df94697a5..2a7c33631a29 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -4,7 +4,11 @@ * * Example of remapping huge page memory in a user application using the * mremap system call. Code assumes a hugetlbfs filesystem is mounted - * at './huge'. The code will use 10MB worth of huge pages. + * at './huge'. The amount of memory used by this test is decided by a command + * line argument in MBs. If missing, the default amount is 10MB. + * + * To make sure the test triggers pmd sharing and goes through the 'unshare' + * path in the mremap code use 1GB (1024) or more. */ #define _GNU_SOURCE @@ -18,8 +22,10 @@ #include <linux/userfaultfd.h> #include <sys/ioctl.h> -#define LENGTH (1UL * 1024 * 1024 * 1024) +#define DEFAULT_LENGTH_MB 10UL +#define MB_TO_BYTES(x) (x * 1024 * 1024) +#define FILE_NAME "huge/hugepagefile" #define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) #define FLAGS (MAP_SHARED | MAP_ANONYMOUS) @@ -28,20 +34,20 @@ static void check_bytes(char *addr) printf("First hex is %x\n", *((unsigned int *)addr)); } -static void write_bytes(char *addr) +static void write_bytes(char *addr, size_t len) { unsigned long i; - for (i = 0; i < LENGTH; i++) + for (i = 0; i < len; i++) *(addr + i) = (char)i; } -static int read_bytes(char *addr) +static int read_bytes(char *addr, size_t len) { unsigned long i; check_bytes(addr); - for (i = 0; i < LENGTH; i++) + for (i = 0; i < len; i++) if (*(addr + i) != (char)i) { printf("Mismatch at %lu\n", i); return 1; @@ -99,11 +105,19 @@ static void register_region_with_uffd(char *addr, size_t len) } } -int main(void) +int main(int argc, char *argv[]) { + /* Read memory length as the first arg if valid, otherwise fallback to + * the default length. Any additional args are ignored. + */ + size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL; + + length = length > 0 ? length : DEFAULT_LENGTH_MB; + length = MB_TO_BYTES(length); + int ret = 0; - int fd = open("/huge/test", O_CREAT | O_RDWR, 0755); + int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); if (fd < 0) { perror("Open failed"); @@ -112,7 +126,7 @@ int main(void) /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */ unsigned long suggested_addr = 0x7eaa40000000; - void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + void *haddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); printf("Map haddr: Returned address is %p\n", haddr); if (haddr == MAP_FAILED) { @@ -122,7 +136,7 @@ int main(void) /* mmap again to a dummy address to hopefully trigger pmd sharing. */ suggested_addr = 0x7daa40000000; - void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION, + void *daddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); printf("Map daddr: Returned address is %p\n", daddr); if (daddr == MAP_FAILED) { @@ -132,16 +146,16 @@ int main(void) suggested_addr = 0x7faa40000000; void *vaddr = - mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0); + mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0); printf("Map vaddr: Returned address is %p\n", vaddr); if (vaddr == MAP_FAILED) { perror("mmap2"); exit(1); } - register_region_with_uffd(haddr, LENGTH); + register_region_with_uffd(haddr, length); - void *addr = mremap(haddr, LENGTH, LENGTH, + void *addr = mremap(haddr, length, length, MREMAP_MAYMOVE | MREMAP_FIXED, vaddr); if (addr == MAP_FAILED) { perror("mremap"); @@ -150,10 +164,10 @@ int main(void) printf("Mremap: Returned address is %p\n", addr); check_bytes(addr); - write_bytes(addr); - ret = read_bytes(addr); + write_bytes(addr, length); + ret = read_bytes(addr, length); - munmap(addr, LENGTH); + munmap(addr, length); return ret; } diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh index 4a9a3afe9fd4..bf2d2a684edf 100644 --- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh @@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then usage_file=current fi -CGROUP_ROOT='/dev/cgroup/memory' -MNT='/mnt/huge/' -if [[ ! -e $CGROUP_ROOT ]]; then - mkdir -p $CGROUP_ROOT - if [[ $cgroup2 ]]; then +if [[ $cgroup2 ]]; then + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT - sleep 1 - echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control - else + do_umount=1 + fi + echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control +else + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + if [[ -z "$CGROUP_ROOT" ]]; then + CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT + do_umount=1 fi fi +MNT='/mnt/huge/' function get_machine_hugepage_size() { hpz=$(grep -i hugepagesize /proc/meminfo) diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c index 0624d1bd71b5..7c0b0617b9f8 100644 --- a/tools/testing/selftests/vm/mremap_test.c +++ b/tools/testing/selftests/vm/mremap_test.c @@ -20,7 +20,6 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) struct config { diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 622a85848f61..92f3be3dd8e5 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -13,6 +13,8 @@ #include <ucontext.h> #include <sys/mman.h> +#include "../kselftest.h" + /* Define some kernel-like types */ #define u8 __u8 #define u16 __u16 @@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); } -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) \ diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index a24d30af3094..75d401741394 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -111,7 +111,7 @@ fi echo "-----------------------" echo "running hugepage-mremap" echo "-----------------------" -./hugepage-mremap +./hugepage-mremap 256 if [ $? -ne 0 ]; then echo "[FAIL]" exitcode=1 diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 9354a5e0321c..d3fd24f9fae8 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -87,7 +87,7 @@ static bool test_uffdio_minor = false; static bool map_shared; static int shm_fd; -static int huge_fd = -1; /* only used for hugetlb_shared test */ +static int huge_fd; static char *huge_fd_off0; static unsigned long long *count_verify; static int uffd = -1; @@ -223,9 +223,6 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) static void hugetlb_release_pages(char *rel_area) { - if (huge_fd == -1) - return; - if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, rel_area == huge_fd_off0 ? 0 : nr_pages * page_size, nr_pages * page_size)) @@ -238,17 +235,17 @@ static void hugetlb_allocate_area(void **alloc_area) char **alloc_area_alias; *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - map_shared ? MAP_SHARED : - MAP_PRIVATE | MAP_HUGETLB | + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + MAP_HUGETLB | (*alloc_area == area_src ? 0 : MAP_NORESERVE), - huge_fd, - *alloc_area == area_src ? 0 : nr_pages * page_size); + huge_fd, *alloc_area == area_src ? 0 : + nr_pages * page_size); if (*alloc_area == MAP_FAILED) err("mmap of hugetlbfs file failed"); if (map_shared) { area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_SHARED, + MAP_SHARED | MAP_HUGETLB, huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) @@ -648,7 +645,7 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg) if (ret != sizeof(*msg)) { if (ret < 0) { - if (errno == EAGAIN) + if (errno == EAGAIN || errno == EINTR) return 1; err("blocking read error"); } else { @@ -724,8 +721,11 @@ static void *uffd_poll_thread(void *arg) for (;;) { ret = poll(pollfd, 2, -1); - if (ret <= 0) + if (ret <= 0) { + if (errno == EINTR || errno == EAGAIN) + continue; err("poll error: %d", ret); + } if (pollfd[1].revents & POLLIN) { if (read(pollfd[1].fd, &tmp_chr, 1) != 1) err("read pipefd error"); @@ -1417,7 +1417,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize) static int userfaultfd_stress(void) { void *area; - char *tmp_area; unsigned long nr; struct uffdio_register uffdio_register; struct uffd_stats uffd_stats[nr_cpus]; @@ -1528,13 +1527,9 @@ static int userfaultfd_stress(void) count_verify[nr], nr); /* prepare next bounce */ - tmp_area = area_src; - area_src = area_dst; - area_dst = tmp_area; + swap(area_src, area_dst); - tmp_area = area_src_alias; - area_src_alias = area_dst_alias; - area_dst_alias = tmp_area; + swap(area_src_alias, area_dst_alias); uffd_stats_report(uffd_stats, nr_cpus); } diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c index 83acdff26a13..da6ec3b53ea8 100644 --- a/tools/testing/selftests/vm/va_128TBswitch.c +++ b/tools/testing/selftests/vm/va_128TBswitch.c @@ -9,7 +9,7 @@ #include <sys/mman.h> #include <string.h> -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#include "../kselftest.h" #ifdef __powerpc64__ #define PAGE_SIZE (64 << 10) diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh index d3d0d108924d..70a02301f4c2 100644 --- a/tools/testing/selftests/vm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -14,7 +14,7 @@ want_sleep=$8 reserve=$9 echo "Putting task in cgroup '$cgroup'" -echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs +echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs echo "Method is $method" diff --git a/tools/thermal/tmon/pid.c b/tools/thermal/tmon/pid.c index c54edb4f630c..296f69c00c57 100644 --- a/tools/thermal/tmon/pid.c +++ b/tools/thermal/tmon/pid.c @@ -54,7 +54,6 @@ static double xk_1, xk_2; /* input temperature x[k-#] */ */ int init_thermal_controller(void) { - int ret = 0; /* init pid params */ p_param.ts = ticktime; @@ -65,7 +64,7 @@ int init_thermal_controller(void) p_param.t_target = target_temp_user; - return ret; + return 0; } void controller_reset(void) diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile new file mode 100644 index 000000000000..2d52ff0bff7d --- /dev/null +++ b/tools/tracing/rtla/Makefile @@ -0,0 +1,102 @@ +NAME := rtla +VERSION := 0.5 + +# From libtracefs: +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,STRIP,$(CROSS_COMPILE)strip) +$(call allow-override,PKG_CONFIG,pkg-config) +$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) +$(call allow-override,LDCONFIG,ldconfig) + +INSTALL = install +FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \ + -fasynchronous-unwind-tables -fstack-clash-protection +WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized + +TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs) + +CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS) +LDFLAGS := -ggdb +LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps + +SRC := $(wildcard src/*.c) +HDR := $(wildcard src/*.h) +OBJ := $(SRC:.c=.o) +DIRS := src +FILES := Makefile README.txt +CEXT := bz2 +TARBALL := $(NAME)-$(VERSION).tar.$(CEXT) +TAROPTS := -cvjf $(TARBALL) +BINDIR := /usr/bin +DATADIR := /usr/share +DOCDIR := $(DATADIR)/doc +MANDIR := $(DATADIR)/man +LICDIR := $(DATADIR)/licenses +SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) + +# If running from the tarball, man pages are stored in the Documentation +# dir. If running from the kernel source, man pages are stored in +# Documentation/tools/rtla/. +ifneq ($(wildcard Documentation/.*),) +DOCSRC = Documentation/ +else +DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/ +endif + +.PHONY: all +all: rtla + +rtla: $(OBJ) doc + $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) + +static: $(OBJ) + $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl + +.PHONY: install +install: doc_install + $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR) + $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(STRIP) $(DESTDIR)$(BINDIR)/rtla + @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise + @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat + ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat + +.PHONY: clean tarball +clean: doc_clean + @test ! -f rtla || rm rtla + @test ! -f rtla-static || rm rtla-static + @test ! -f src/rtla.o || rm src/rtla.o + @test ! -f $(TARBALL) || rm -f $(TARBALL) + @rm -rf *~ $(OBJ) *.tar.$(CEXT) + +tarball: clean + rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) + mkdir $(NAME)-$(VERSION)/Documentation/ + cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/ + tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION) + rm -rf $(NAME)-$(VERSION) + +.PHONY: doc doc_clean doc_install +doc: + $(MAKE) -C $(DOCSRC) + +doc_clean: + $(MAKE) -C $(DOCSRC) clean + +doc_install: + $(MAKE) -C $(DOCSRC) install diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt new file mode 100644 index 000000000000..6c88446f7e74 --- /dev/null +++ b/tools/tracing/rtla/README.txt @@ -0,0 +1,36 @@ +RTLA: Real-Time Linux Analysis tools + +The rtla is a meta-tool that includes a set of commands that +aims to analyze the real-time properties of Linux. But, instead of +testing Linux as a black box, rtla leverages kernel tracing +capabilities to provide precise information about the properties +and root causes of unexpected results. + +Installing RTLA + +RTLA depends on some libraries and tools. More precisely, it depends on the +following libraries: + + - libtracefs + - libtraceevent + - procps + +It also depends on python3-docutils to compile man pages. + +For development, we suggest the following steps for compiling rtla: + + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git + $ cd libtraceevent/ + $ make + $ sudo make install + $ cd .. + $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git + $ cd libtracefs/ + $ make + $ sudo make install + $ cd .. + $ cd $rtla_src + $ make + $ sudo make install + +For further information, please refer to the rtla man page. diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c new file mode 100644 index 000000000000..7b73d1eccd0e --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.c @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise_get_cpus - return the original "osnoise/cpus" content + * + * It also saves the value to be restored. + */ +char *osnoise_get_cpus(struct osnoise_context *context) +{ + if (context->curr_cpus) + return context->curr_cpus; + + if (context->orig_cpus) + return context->orig_cpus; + + context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL); + + /* + * The error value (NULL) is the same for tracefs_instance_file_read() + * and this functions, so: + */ + return context->orig_cpus; +} + +/* + * osnoise_set_cpus - configure osnoise to run on *cpus + * + * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat + * will run. This function opens this file, saves the current value, + * and set the cpus passed as argument. + */ +int osnoise_set_cpus(struct osnoise_context *context, char *cpus) +{ + char *orig_cpus = osnoise_get_cpus(context); + char buffer[1024]; + int retval; + + if (!orig_cpus) + return -1; + + context->curr_cpus = strdup(cpus); + if (!context->curr_cpus) + return -1; + + snprintf(buffer, 1024, "%s\n", cpus); + + debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer); + if (retval < 0) { + free(context->curr_cpus); + context->curr_cpus = NULL; + return -1; + } + + return 0; +} + +/* + * osnoise_restore_cpus - restore the original "osnoise/cpus" + * + * osnoise_set_cpus() saves the original data for the "osnoise/cpus" + * file. This function restore the original config it was previously + * modified. + */ +void osnoise_restore_cpus(struct osnoise_context *context) +{ + int retval; + + if (!context->orig_cpus) + return; + + if (!context->curr_cpus) + return; + + /* nothing to do? */ + if (!strcmp(context->orig_cpus, context->curr_cpus)) + goto out_done; + + debug_msg("restoring cpus to %s", context->orig_cpus); + + retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus); + if (retval < 0) + err_msg("could not restore original osnoise cpus\n"); + +out_done: + free(context->curr_cpus); + context->curr_cpus = NULL; +} + +/* + * osnoise_put_cpus - restore cpus config and cleanup data + */ +void osnoise_put_cpus(struct osnoise_context *context) +{ + osnoise_restore_cpus(context); + + if (!context->orig_cpus) + return; + + free(context->orig_cpus); + context->orig_cpus = NULL; +} + +/* + * osnoise_read_ll_config - read a long long value from a config + * + * returns -1 on error. + */ +static long long osnoise_read_ll_config(char *rel_path) +{ + long long retval; + char *buffer; + + buffer = tracefs_instance_file_read(NULL, rel_path, NULL); + if (!buffer) + return -1; + + /* get_llong_from_str returns -1 on error */ + retval = get_llong_from_str(buffer); + + debug_msg("reading %s returned %lld\n", rel_path, retval); + + free(buffer); + + return retval; +} + +/* + * osnoise_write_ll_config - write a long long value to a config in rel_path + * + * returns -1 on error. + */ +static long long osnoise_write_ll_config(char *rel_path, long long value) +{ + char buffer[BUFF_U64_STR_SIZE]; + long long retval; + + snprintf(buffer, sizeof(buffer), "%lld\n", value); + + debug_msg("setting %s to %lld\n", rel_path, value); + + retval = tracefs_instance_file_write(NULL, rel_path, buffer); + return retval; +} + +/* + * osnoise_get_runtime - return the original "osnoise/runtime_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_runtime(struct osnoise_context *context) +{ + long long runtime_us; + + if (context->runtime_us != OSNOISE_TIME_INIT_VAL) + return context->runtime_us; + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + return context->orig_runtime_us; + + runtime_us = osnoise_read_ll_config("osnoise/runtime_us"); + if (runtime_us < 0) + goto out_err; + + context->orig_runtime_us = runtime_us; + return runtime_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_period - return the original "osnoise/period_us" value + * + * It also saves the value to be restored. + */ +unsigned long long osnoise_get_period(struct osnoise_context *context) +{ + long long period_us; + + if (context->period_us != OSNOISE_TIME_INIT_VAL) + return context->period_us; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_period_us; + + period_us = osnoise_read_ll_config("osnoise/period_us"); + if (period_us < 0) + goto out_err; + + context->orig_period_us = period_us; + return period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +static int __osnoise_write_runtime(struct osnoise_context *context, + unsigned long long runtime) +{ + int retval; + + if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/runtime_us", runtime); + if (retval < 0) + return -1; + + context->runtime_us = runtime; + return 0; +} + +static int __osnoise_write_period(struct osnoise_context *context, + unsigned long long period) +{ + int retval; + + if (context->orig_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/period_us", period); + if (retval < 0) + return -1; + + context->period_us = period; + return 0; +} + +/* + * osnoise_set_runtime_period - set osnoise runtime and period + * + * Osnoise's runtime and period are related as runtime <= period. + * Thus, this function saves the original values, and then tries + * to set the runtime and period if they are != 0. + */ +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period) +{ + unsigned long long curr_runtime_us; + unsigned long long curr_period_us; + int retval; + + if (!period && !runtime) + return 0; + + curr_runtime_us = osnoise_get_runtime(context); + curr_period_us = osnoise_get_period(context); + + /* error getting any value? */ + if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL) + return -1; + + if (!period) { + if (runtime > curr_period_us) + return -1; + return __osnoise_write_runtime(context, runtime); + } else if (!runtime) { + if (period < curr_runtime_us) + return -1; + return __osnoise_write_period(context, period); + } + + if (runtime > curr_period_us) { + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + } else { + retval = __osnoise_write_runtime(context, runtime); + if (retval) + return -1; + retval = __osnoise_write_period(context, period); + if (retval) + return -1; + } + + return 0; +} + +/* + * osnoise_restore_runtime_period - restore the original runtime and period + */ +void osnoise_restore_runtime_period(struct osnoise_context *context) +{ + unsigned long long orig_runtime = context->orig_runtime_us; + unsigned long long orig_period = context->orig_period_us; + unsigned long long curr_runtime = context->runtime_us; + unsigned long long curr_period = context->period_us; + int retval; + + if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL)) + return; + + if ((orig_period == curr_period) && (orig_runtime == curr_runtime)) + goto out_done; + + retval = osnoise_set_runtime_period(context, orig_runtime, orig_period); + if (retval) + err_msg("Could not restore original osnoise runtime/period\n"); + +out_done: + context->runtime_us = OSNOISE_TIME_INIT_VAL; + context->period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_runtime_period - restore original values and cleanup data + */ +void osnoise_put_runtime_period(struct osnoise_context *context) +{ + osnoise_restore_runtime_period(context); + + if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL) + context->orig_runtime_us = OSNOISE_TIME_INIT_VAL; + + if (context->orig_period_us != OSNOISE_TIME_INIT_VAL) + context->orig_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us" + */ +static long long +osnoise_get_timerlat_period_us(struct osnoise_context *context) +{ + long long timerlat_period_us; + + if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->timerlat_period_us; + + if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL) + return context->orig_timerlat_period_us; + + timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us"); + if (timerlat_period_us < 0) + goto out_err; + + context->orig_timerlat_period_us = timerlat_period_us; + return timerlat_period_us; + +out_err: + return OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_set_timerlat_period_us - set "timerlat_period_us" + */ +int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us) +{ + long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context); + int retval; + + if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us); + if (retval < 0) + return -1; + + context->timerlat_period_us = timerlat_period_us; + + return 0; +} + +/* + * osnoise_restore_timerlat_period_us - restore "timerlat_period_us" + */ +void osnoise_restore_timerlat_period_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + if (context->orig_timerlat_period_us == context->timerlat_period_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us); + if (retval < 0) + err_msg("Could not restore original osnoise timerlat_period_us\n"); + +out_done: + context->timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_put_timerlat_period_us - restore original values and cleanup data + */ +void osnoise_put_timerlat_period_us(struct osnoise_context *context) +{ + osnoise_restore_timerlat_period_us(context); + + if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL) + return; + + context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL; +} + +/* + * osnoise_get_stop_us - read and save the original "stop_tracing_us" + */ +static long long +osnoise_get_stop_us(struct osnoise_context *context) +{ + long long stop_us; + + if (context->stop_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_us; + + if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_us; + + stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us"); + if (stop_us < 0) + goto out_err; + + context->orig_stop_us = stop_us; + return stop_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_us - set "stop_tracing_us" + */ +int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us) +{ + long long curr_stop_us = osnoise_get_stop_us(context); + int retval; + + if (curr_stop_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us); + if (retval < 0) + return -1; + + context->stop_us = stop_us; + + return 0; +} + +/* + * osnoise_restore_stop_us - restore the original "stop_tracing_us" + */ +void osnoise_restore_stop_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_us == context->stop_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_us\n"); + +out_done: + context->stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_us - restore original values and cleanup data + */ +void osnoise_put_stop_us(struct osnoise_context *context) +{ + osnoise_restore_stop_us(context); + + if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us" + */ +static long long +osnoise_get_stop_total_us(struct osnoise_context *context) +{ + long long stop_total_us; + + if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->stop_total_us; + + if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL) + return context->orig_stop_total_us; + + stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us"); + if (stop_total_us < 0) + goto out_err; + + context->orig_stop_total_us = stop_total_us; + return stop_total_us; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_stop_total_us - set "stop_tracing_total_us" + */ +int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us) +{ + long long curr_stop_total_us = osnoise_get_stop_total_us(context); + int retval; + + if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us); + if (retval < 0) + return -1; + + context->stop_total_us = stop_total_us; + + return 0; +} + +/* + * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us" + */ +void osnoise_restore_stop_total_us(struct osnoise_context *context) +{ + int retval; + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_stop_total_us == context->stop_total_us) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", + context->orig_stop_total_us); + if (retval < 0) + err_msg("Could not restore original osnoise stop_total_us\n"); + +out_done: + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_stop_total_us - restore original values and cleanup data + */ +void osnoise_put_stop_total_us(struct osnoise_context *context) +{ + osnoise_restore_stop_total_us(context); + + if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_get_print_stack - read and save the original "print_stack" + */ +static long long +osnoise_get_print_stack(struct osnoise_context *context) +{ + long long print_stack; + + if (context->print_stack != OSNOISE_OPTION_INIT_VAL) + return context->print_stack; + + if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL) + return context->orig_print_stack; + + print_stack = osnoise_read_ll_config("osnoise/print_stack"); + if (print_stack < 0) + goto out_err; + + context->orig_print_stack = print_stack; + return print_stack; + +out_err: + return OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_set_print_stack - set "print_stack" + */ +int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack) +{ + long long curr_print_stack = osnoise_get_print_stack(context); + int retval; + + if (curr_print_stack == OSNOISE_OPTION_INIT_VAL) + return -1; + + retval = osnoise_write_ll_config("osnoise/print_stack", print_stack); + if (retval < 0) + return -1; + + context->print_stack = print_stack; + + return 0; +} + +/* + * osnoise_restore_print_stack - restore the original "print_stack" + */ +void osnoise_restore_print_stack(struct osnoise_context *context) +{ + int retval; + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + if (context->orig_print_stack == context->print_stack) + goto out_done; + + retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack); + if (retval < 0) + err_msg("Could not restore original osnoise print_stack\n"); + +out_done: + context->print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * osnoise_put_print_stack - restore original values and cleanup data + */ +void osnoise_put_print_stack(struct osnoise_context *context) +{ + osnoise_restore_print_stack(context); + + if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL) + return; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; +} + +/* + * enable_osnoise - enable osnoise tracer in the trace_instance + */ +int enable_osnoise(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "osnoise"); +} + +/* + * enable_timerlat - enable timerlat tracer in the trace_instance + */ +int enable_timerlat(struct trace_instance *trace) +{ + return enable_tracer_by_name(trace->inst, "timerlat"); +} + +enum { + FLAG_CONTEXT_NEWLY_CREATED = (1 << 0), + FLAG_CONTEXT_DELETED = (1 << 1), +}; + +/* + * osnoise_get_context - increase the usage of a context and return it + */ +int osnoise_get_context(struct osnoise_context *context) +{ + int ret; + + if (context->flags & FLAG_CONTEXT_DELETED) { + ret = -1; + } else { + context->ref++; + ret = 0; + } + + return ret; +} + +/* + * osnoise_context_alloc - alloc an osnoise_context + * + * The osnoise context contains the information of the "osnoise/" configs. + * It is used to set and restore the config. + */ +struct osnoise_context *osnoise_context_alloc(void) +{ + struct osnoise_context *context; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + context->orig_stop_us = OSNOISE_OPTION_INIT_VAL; + context->stop_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL; + context->stop_total_us = OSNOISE_OPTION_INIT_VAL; + + context->orig_print_stack = OSNOISE_OPTION_INIT_VAL; + context->print_stack = OSNOISE_OPTION_INIT_VAL; + + osnoise_get_context(context); + + return context; +} + +/* + * osnoise_put_context - put the osnoise_put_context + * + * If there is no other user for the context, the original data + * is restored. + */ +void osnoise_put_context(struct osnoise_context *context) +{ + if (--context->ref < 1) + context->flags |= FLAG_CONTEXT_DELETED; + + if (!(context->flags & FLAG_CONTEXT_DELETED)) + return; + + osnoise_put_cpus(context); + osnoise_put_runtime_period(context); + osnoise_put_stop_us(context); + osnoise_put_stop_total_us(context); + osnoise_put_timerlat_period_us(context); + osnoise_put_print_stack(context); + + free(context); +} + +/* + * osnoise_destroy_tool - disable trace, restore configs and free data + */ +void osnoise_destroy_tool(struct osnoise_tool *top) +{ + trace_instance_destroy(&top->trace); + + if (top->context) + osnoise_put_context(top->context); + + free(top); +} + +/* + * osnoise_init_tool - init an osnoise tool + * + * It allocs data, create a context to store data and + * creates a new trace instance for the tool. + */ +struct osnoise_tool *osnoise_init_tool(char *tool_name) +{ + struct osnoise_tool *top; + int retval; + + top = calloc(1, sizeof(*top)); + if (!top) + return NULL; + + top->context = osnoise_context_alloc(); + if (!top->context) + goto out_err; + + retval = trace_instance_init(&top->trace, tool_name); + if (retval) + goto out_err; + + return top; +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +/* + * osnoise_init_trace_tool - init a tracer instance to trace osnoise events + */ +struct osnoise_tool *osnoise_init_trace_tool(char *tracer) +{ + struct osnoise_tool *trace; + int retval; + + trace = osnoise_init_tool("osnoise_trace"); + if (!trace) + return NULL; + + retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL); + if (retval < 0 && !errno) { + err_msg("Could not find osnoise events\n"); + goto out_err; + } + + retval = enable_tracer_by_name(trace->trace.inst, tracer); + if (retval) { + err_msg("Could not enable osnoiser tracer for tracing\n"); + goto out_err; + } + + return trace; +out_err: + osnoise_destroy_tool(trace); + return NULL; +} + +static void osnoise_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "osnoise version " VERSION, + "", + " usage: [rtla] osnoise [MODE] ...", + "", + " modes:", + " top - prints the summary from osnoise tracer", + " hist - prints a histogram of osnoise samples", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int osnoise_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if osnoise was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + osnoise_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + osnoise_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + osnoise_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + osnoise_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + osnoise_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + osnoise_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h new file mode 100644 index 000000000000..9e4b2e2a4559 --- /dev/null +++ b/tools/tracing/rtla/src/osnoise.h @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "trace.h" + +/* + * osnoise_context - read, store, write, restore osnoise configs. + */ +struct osnoise_context { + int flags; + int ref; + + char *curr_cpus; + char *orig_cpus; + + /* 0 as init value */ + unsigned long long orig_runtime_us; + unsigned long long runtime_us; + + /* 0 as init value */ + unsigned long long orig_period_us; + unsigned long long period_us; + + /* 0 as init value */ + long long orig_timerlat_period_us; + long long timerlat_period_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_us; + long long stop_us; + + /* -1 as init value because 0 is disabled */ + long long orig_stop_total_us; + long long stop_total_us; + + /* -1 as init value because 0 is disabled */ + long long orig_print_stack; + long long print_stack; +}; + +/* + * *_INIT_VALs are also invalid values, they are used to + * communicate errors. + */ +#define OSNOISE_OPTION_INIT_VAL (-1) +#define OSNOISE_TIME_INIT_VAL (0) + +struct osnoise_context *osnoise_context_alloc(void); +int osnoise_get_context(struct osnoise_context *context); +void osnoise_put_context(struct osnoise_context *context); + +int osnoise_set_cpus(struct osnoise_context *context, char *cpus); +void osnoise_restore_cpus(struct osnoise_context *context); + +int osnoise_set_runtime_period(struct osnoise_context *context, + unsigned long long runtime, + unsigned long long period); +void osnoise_restore_runtime_period(struct osnoise_context *context); + +int osnoise_set_stop_us(struct osnoise_context *context, + long long stop_us); +void osnoise_restore_stop_us(struct osnoise_context *context); + +int osnoise_set_stop_total_us(struct osnoise_context *context, + long long stop_total_us); +void osnoise_restore_stop_total_us(struct osnoise_context *context); + +int osnoise_set_timerlat_period_us(struct osnoise_context *context, + long long timerlat_period_us); +void osnoise_restore_timerlat_period_us(struct osnoise_context *context); + +void osnoise_restore_print_stack(struct osnoise_context *context); +int osnoise_set_print_stack(struct osnoise_context *context, + long long print_stack); + +/* + * osnoise_tool - osnoise based tool definition. + */ +struct osnoise_tool { + struct trace_instance trace; + struct osnoise_context *context; + void *data; + void *params; + time_t start_time; +}; + +void osnoise_destroy_tool(struct osnoise_tool *top); +struct osnoise_tool *osnoise_init_tool(char *tool_name); +struct osnoise_tool *osnoise_init_trace_tool(char *tracer); + +int osnoise_hist_main(int argc, char *argv[]); +int osnoise_top_main(int argc, char **argv); +int osnoise_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c new file mode 100644 index 000000000000..180fcbe423cd --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -0,0 +1,801 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" + +struct osnoise_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int set_sched; + int output_divisor; + struct sched_attr sched_param; + + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct osnoise_hist_cpu { + int *samples; + int count; + + unsigned long long min_sample; + unsigned long long sum_sample; + unsigned long long max_sample; + +}; + +struct osnoise_hist_data { + struct tracefs_hist *trace_hist; + struct osnoise_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * osnoise_free_histogram - free runtime data + */ +static void +osnoise_free_histogram(struct osnoise_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].samples) + free(data->hist[cpu].samples); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_hist_data +*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct osnoise_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1)); + if (!data->hist[cpu].samples) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) + data->hist[cpu].min_sample = ~0; + + return data; + +cleanup: + osnoise_free_histogram(data); + return NULL; +} + +static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu, + unsigned long long duration, int count) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + duration = duration / params->output_divisor; + + if (data->bucket_size) + bucket = duration / data->bucket_size; + + hist = data->hist[cpu].samples; + data->hist[cpu].count += count; + update_min(&data->hist[cpu].min_sample, &duration); + update_sum(&data->hist[cpu].sum_sample, &duration); + update_max(&data->hist[cpu].max_sample, &duration); + + if (bucket < entries) + hist[bucket] += count; + else + hist[entries] += count; +} + +/* + * osnoise_destroy_trace_hist - disable events used to collect histogram + */ +static void osnoise_destroy_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + tracefs_hist_destroy(tool->trace.inst, data->trace_hist); +} + +/* + * osnoise_init_trace_hist - enable events used to collect histogram + */ +static int osnoise_init_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + int bucket_size; + char buff[128]; + int retval = 0; + + /* + * Set the size of the bucket. + */ + bucket_size = params->output_divisor * params->bucket_size; + snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size); + + data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold", + buff, TRACEFS_HIST_KEY_NORMAL); + if (!data->trace_hist) + return 1; + + retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0); + if (retval) + goto out_err; + + retval = tracefs_hist_start(tool->trace.inst, data->trace_hist); + if (retval) + goto out_err; + + return 0; + +out_err: + osnoise_destroy_trace_hist(tool); + return 1; +} + +/* + * osnoise_read_trace_hist - parse histogram file and file osnoise histogram + */ +static void osnoise_read_trace_hist(struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + long long cpu, counter, duration; + char *content, *position; + + tracefs_hist_pause(tool->trace.inst, data->trace_hist); + + content = tracefs_event_file_read(tool->trace.inst, "osnoise", + "sample_threshold", + "hist", NULL); + if (!content) + return; + + position = content; + while (true) { + position = strstr(position, "duration: ~"); + if (!position) + break; + position += strlen("duration: ~"); + duration = get_llong_from_str(position); + if (duration == -1) + err_msg("error reading duration from histogram\n"); + + position = strstr(position, "cpu:"); + if (!position) + break; + position += strlen("cpu: "); + cpu = get_llong_from_str(position); + if (cpu == -1) + err_msg("error reading cpu from histogram\n"); + + position = strstr(position, "hitcount:"); + if (!position) + break; + position += strlen("hitcount: "); + counter = get_llong_from_str(position); + if (counter == -1) + err_msg("error reading counter from histogram\n"); + + osnoise_hist_update_multiple(tool, cpu, duration, counter); + } + free(content); +} + +/* + * osnoise_hist_header - print the header of the tracer to the output + */ +static void osnoise_hist_header(struct osnoise_tool *tool) +{ + struct osnoise_hist_params *params = tool->params; + struct osnoise_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA osnoise histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(s, " CPU-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * osnoise_print_summary - print the summary of the hist data to the output + */ +static void +osnoise_print_summary(struct osnoise_hist_params *params, + struct trace_instance *trace, + struct osnoise_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample); + + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + if (data->hist[cpu].count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_sample / data->hist[cpu].count); + else + trace_seq_printf(trace->seq, " - "); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample); + + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_print_stats - print data for all CPUs + */ +static void +osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool) +{ + struct osnoise_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + osnoise_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + total += data->hist[cpu].samples[bucket]; + trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]); + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].count) + continue; + + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].samples[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + osnoise_print_summary(params, trace, data); +} + +/* + * osnoise_hist_usage - prints osnoise hist usage message + */ +static void osnoise_hist_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + "", + " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -e/--entries N: set the number of entries of the histogram (default 256)", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct osnoise_hist_params +*osnoise_hist_parse_args(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"bucket-size", required_argument, 0, 'b'}, + {"entries", required_argument, 0, 'e'}, + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {"no-header", no_argument, 0, '0'}, + {"no-summary", no_argument, 0, '1'}, + {"no-index", no_argument, 0, '2'}, + {"with-zeros", no_argument, 0, '3'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_hist_usage("Invalid -D duration\n"); + break; + case 'e': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + osnoise_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + osnoise_hist_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_hist_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_hist_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + case '0': /* no header */ + params->no_header = 1; + break; + case '1': /* no summary */ + params->no_summary = 1; + break; + case '2': /* no index */ + params->no_index = 1; + break; + case '3': /* with zeros */ + params->with_zeros = 1; + break; + default: + osnoise_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_index && !params->with_zeros) + osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense"); + + return params; +} + +/* + * osnoise_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_hist - initialize a osnoise hist tool with parameters + */ +static struct osnoise_tool +*osnoise_init_hist(struct osnoise_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_hist"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_hist_set_signals - handles the signal to stop the tool + */ +static void +osnoise_hist_set_signals(struct osnoise_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int osnoise_hist_main(int argc, char *argv[]) +{ + struct osnoise_hist_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = osnoise_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = osnoise_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_destroy; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_destroy; + } + + retval = osnoise_init_trace_hist(tool); + if (retval) + goto out_destroy; + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + osnoise_read_trace_hist(tool); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + osnoise_free_histogram(tool->data); +out_destroy: + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c new file mode 100644 index 000000000000..332b2ac205fc --- /dev/null +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -0,0 +1,579 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "osnoise.h" +#include "utils.h" + +/* + * osnoise top parameters + */ +struct osnoise_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + unsigned long long period; + long long stop_us; + long long stop_total_us; + int sleep_time; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct osnoise_top_cpu { + unsigned long long sum_runtime; + unsigned long long sum_noise; + unsigned long long max_noise; + unsigned long long max_sample; + + unsigned long long hw_count; + unsigned long long nmi_count; + unsigned long long irq_count; + unsigned long long softirq_count; + unsigned long long thread_count; + + int sum_cycles; +}; + +struct osnoise_top_data { + struct osnoise_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * osnoise_free_top - free runtime data + */ +static void +osnoise_free_top(struct osnoise_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * osnoise_alloc_histogram - alloc runtime data + */ +static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus) +{ + struct osnoise_top_data *data; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + return data; + +cleanup: + osnoise_free_top(data); + return NULL; +} + +/* + * osnoise_top_handler - this is the handler for osnoise tracer events + */ +static int +osnoise_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + struct osnoise_tool *tool; + unsigned long long val; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + cpu_data->sum_cycles++; + + tep_get_field_val(s, event, "runtime", record, &val, 1); + update_sum(&cpu_data->sum_runtime, &val); + + tep_get_field_val(s, event, "noise", record, &val, 1); + update_max(&cpu_data->max_noise, &val); + update_sum(&cpu_data->sum_noise, &val); + + tep_get_field_val(s, event, "max_sample", record, &val, 1); + update_max(&cpu_data->max_sample, &val); + + tep_get_field_val(s, event, "hw_count", record, &val, 1); + update_sum(&cpu_data->hw_count, &val); + + tep_get_field_val(s, event, "nmi_count", record, &val, 1); + update_sum(&cpu_data->nmi_count, &val); + + tep_get_field_val(s, event, "irq_count", record, &val, 1); + update_sum(&cpu_data->irq_count, &val); + + tep_get_field_val(s, event, "softirq_count", record, &val, 1); + update_sum(&cpu_data->softirq_count, &val); + + tep_get_field_val(s, event, "thread_count", record, &val, 1); + update_sum(&cpu_data->thread_count, &val); + + return 0; +} + +/* + * osnoise_top_header - print the header of the tool output + */ +static void osnoise_top_header(struct osnoise_tool *top) +{ + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Operating System Noise"); + trace_seq_printf(s, " "); + trace_seq_printf(s, " "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "duration: %9s | time is in us\n", duration); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU Period Runtime "); + trace_seq_printf(s, " Noise "); + trace_seq_printf(s, " %% CPU Aval "); + trace_seq_printf(s, " Max Noise Max Single "); + trace_seq_printf(s, " HW NMI IRQ Softirq Thread"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * osnoise_top_print - prints the output of a given CPU + */ +static void osnoise_top_print(struct osnoise_tool *tool, int cpu) +{ + struct trace_seq *s = tool->trace.seq; + struct osnoise_top_cpu *cpu_data; + struct osnoise_top_data *data; + int percentage; + int decimal; + + data = tool->data; + cpu_data = &data->cpu_data[cpu]; + + if (!cpu_data->sum_runtime) + return; + + percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000) + / cpu_data->sum_runtime; + decimal = percentage % 100000; + percentage = percentage / 100000; + + trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime); + trace_seq_printf(s, "%12llu ", cpu_data->sum_noise); + trace_seq_printf(s, " %3d.%05d", percentage, decimal); + trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample); + + trace_seq_printf(s, "%12llu ", cpu_data->hw_count); + trace_seq_printf(s, "%12llu ", cpu_data->nmi_count); + trace_seq_printf(s, "%12llu ", cpu_data->irq_count); + trace_seq_printf(s, "%12llu ", cpu_data->softirq_count); + trace_seq_printf(s, "%12llu\n", cpu_data->thread_count); +} + +/* + * osnoise_print_stats - print data for all cpus + */ +static void +osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + osnoise_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + osnoise_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * osnoise_top_usage - prints osnoise top usage message + */ +void osnoise_top_usage(char *usage) +{ + int i; + + static const char * const msg[] = { + " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: osnoise period in us", + " -r/--runtime us: osnoise runtime in us", + " -s/--stop us: stop trace if a single sample is higher than the argument in us", + " -S/--stop-total us: stop trace if the total sample is higher than the argument in us", + " -c/--cpus cpu-list: list of cpus to run osnoise threads", + " -d/--duration time[s|m|h|d]: duration of the session", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters + */ +struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) +{ + struct osnoise_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"runtime", required_argument, 0, 'r'}, + {"stop", required_argument, 0, 's'}, + {"stop-total", required_argument, 0, 'S'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::", + long_options, &option_index); + + /* Detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + osnoise_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + osnoise_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + osnoise_top_usage(NULL); + break; + case 'p': + params->period = get_llong_from_str(optarg); + if (params->period > 10000000) + osnoise_top_usage("Period longer than 10 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + osnoise_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 'r': + params->runtime = get_llong_from_str(optarg); + if (params->runtime < 100) + osnoise_top_usage("Runtime shorter than 100 us\n"); + break; + case 's': + params->stop_us = get_llong_from_str(optarg); + break; + case 'S': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "osnoise_trace.txt"; + break; + default: + osnoise_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("osnoise needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * osnoise_top_apply_config - apply the top configs to the initialized tool + */ +static int +osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->runtime || params->period) { + retval = osnoise_set_runtime_period(tool->context, + params->runtime, + params->period); + if (retval) { + err_msg("Failed to set runtime and/or period\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * osnoise_init_top - initialize a osnoise top tool with parameters + */ +struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("osnoise_top"); + if (!tool) + return NULL; + + tool->data = osnoise_alloc_top(nr_cpus); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise", + osnoise_top_handler, NULL); + + return tool; + +out_err: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * osnoise_top_set_signals - handles the signal to stop the tool + */ +static void osnoise_top_set_signals(struct osnoise_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int osnoise_top_main(int argc, char **argv) +{ + struct osnoise_top_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = osnoise_top_parse_args(argc, argv); + if (!params) + exit(1); + + tool = osnoise_init_top(params); + if (!tool) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = osnoise_top_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &tool->trace; + + retval = enable_osnoise(trace); + if (retval) { + err_msg("Failed to enable osnoise tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("osnoise/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("osnoise"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + osnoise_top_set_signals(params); + + do { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + osnoise_print_stats(params, tool); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + } while (!stop_tracing); + + osnoise_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("osnoise hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + osnoise_free_top(tool->data); + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c new file mode 100644 index 000000000000..09bd21b8af81 --- /dev/null +++ b/tools/tracing/rtla/src/rtla.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "osnoise.h" +#include "timerlat.h" + +/* + * rtla_usage - print rtla usage + */ +static void rtla_usage(void) +{ + int i; + + static const char *msg[] = { + "", + "rtla version " VERSION, + "", + " usage: rtla COMMAND ...", + "", + " commands:", + " osnoise - gives information about the operating system noise (osnoise)", + " timerlat - measures the timer irq and thread latency", + "", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * run_command - try to run a rtla tool command + * + * It returns 0 if it fails. The tool's main will generally not + * return as they should call exit(). + */ +int run_command(int argc, char **argv, int start_position) +{ + if (strcmp(argv[start_position], "osnoise") == 0) { + osnoise_main(argc-start_position, &argv[start_position]); + goto ran; + } else if (strcmp(argv[start_position], "timerlat") == 0) { + timerlat_main(argc-start_position, &argv[start_position]); + goto ran; + } + + return 0; +ran: + return 1; +} + +int main(int argc, char *argv[]) +{ + int retval; + + /* is it an alias? */ + retval = run_command(argc, argv, 0); + if (retval) + exit(0); + + if (argc < 2) + goto usage; + + if (strcmp(argv[1], "-h") == 0) { + rtla_usage(); + exit(0); + } else if (strcmp(argv[1], "--help") == 0) { + rtla_usage(); + exit(0); + } + + retval = run_command(argc, argv, 1); + if (retval) + exit(0); + +usage: + rtla_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c new file mode 100644 index 000000000000..97abbf494fee --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> + +#include "timerlat.h" + +static void timerlat_usage(void) +{ + int i; + + static const char * const msg[] = { + "", + "timerlat version " VERSION, + "", + " usage: [rtla] timerlat [MODE] ...", + "", + " modes:", + " top - prints the summary from timerlat tracer", + " hist - prints a histogram of timer latencies", + "", + "if no MODE is given, the top mode is called, passing the arguments", + NULL, + }; + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +int timerlat_main(int argc, char *argv[]) +{ + if (argc == 0) + goto usage; + + /* + * if timerlat was called without any argument, run the + * default cmdline. + */ + if (argc == 1) { + timerlat_top_main(argc, argv); + exit(0); + } + + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { + timerlat_usage(); + exit(0); + } else if (strncmp(argv[1], "-", 1) == 0) { + /* the user skipped the tool, call the default one */ + timerlat_top_main(argc, argv); + exit(0); + } else if (strcmp(argv[1], "top") == 0) { + timerlat_top_main(argc-1, &argv[1]); + exit(0); + } else if (strcmp(argv[1], "hist") == 0) { + timerlat_hist_main(argc-1, &argv[1]); + exit(0); + } + +usage: + timerlat_usage(); + exit(1); +} diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h new file mode 100644 index 000000000000..88561bfd14f3 --- /dev/null +++ b/tools/tracing/rtla/src/timerlat.h @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +int timerlat_hist_main(int argc, char *argv[]); +int timerlat_top_main(int argc, char *argv[]); +int timerlat_main(int argc, char *argv[]); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c new file mode 100644 index 000000000000..235f9620ef3d --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_hist_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int set_sched; + struct sched_attr sched_param; + + char no_irq; + char no_thread; + char no_header; + char no_summary; + char no_index; + char with_zeros; + int bucket_size; + int entries; +}; + +struct timerlat_hist_cpu { + int *irq; + int *thread; + + int irq_count; + int thread_count; + + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_hist_data { + struct timerlat_hist_cpu *hist; + int entries; + int bucket_size; + int nr_cpus; +}; + +/* + * timerlat_free_histogram - free runtime data + */ +static void +timerlat_free_histogram(struct timerlat_hist_data *data) +{ + int cpu; + + /* one histogram for IRQ and one for thread, per CPU */ + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (data->hist[cpu].irq) + free(data->hist[cpu].irq); + + if (data->hist[cpu].thread) + free(data->hist[cpu].thread); + } + + /* one set of histograms per CPU */ + if (data->hist) + free(data->hist); + + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_hist_data +*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size) +{ + struct timerlat_hist_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->entries = entries; + data->bucket_size = bucket_size; + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->hist = calloc(1, sizeof(*data->hist) * nr_cpus); + if (!data->hist) + goto cleanup; + + /* one histogram for IRQ and one for thread, per cpu */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1)); + if (!data->hist[cpu].irq) + goto cleanup; + data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1)); + if (!data->hist[cpu].thread) + goto cleanup; + } + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->hist[cpu].min_irq = ~0; + data->hist[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_histogram(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_hist_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + int entries = data->entries; + int bucket; + int *hist; + + if (params->output_divisor) + latency = latency / params->output_divisor; + + if (data->bucket_size) + bucket = latency / data->bucket_size; + + if (!thread) { + hist = data->hist[cpu].irq; + data->hist[cpu].irq_count++; + update_min(&data->hist[cpu].min_irq, &latency); + update_sum(&data->hist[cpu].sum_irq, &latency); + update_max(&data->hist[cpu].max_irq, &latency); + } else { + hist = data->hist[cpu].thread; + data->hist[cpu].thread_count++; + update_min(&data->hist[cpu].min_thread, &latency); + update_sum(&data->hist[cpu].sum_thread, &latency); + update_max(&data->hist[cpu].max_thread, &latency); + } + + if (bucket < entries) + hist[bucket]++; + else + hist[entries]++; +} + +/* + * timerlat_hist_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_hist_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *data) +{ + struct trace_instance *trace = data; + unsigned long long thread, latency; + struct osnoise_tool *tool; + int cpu = record->cpu; + + tool = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_hist_update(tool, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_hist_header - print the header of the tracer to the output + */ +static void timerlat_hist_header(struct osnoise_tool *tool) +{ + struct timerlat_hist_params *params = tool->params; + struct timerlat_hist_data *data = tool->data; + struct trace_seq *s = tool->trace.seq; + char duration[26]; + int cpu; + + if (params->no_header) + return; + + get_duration(tool->start_time, duration, sizeof(duration)); + trace_seq_printf(s, "# RTLA timerlat histogram\n"); + trace_seq_printf(s, "# Time unit is %s (%s)\n", + params->output_divisor == 1 ? "nanoseconds" : "microseconds", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "# Duration: %s\n", duration); + + if (!params->no_index) + trace_seq_printf(s, "Index"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(s, " IRQ-%03d", cpu); + + if (!params->no_thread) + trace_seq_printf(s, " Thr-%03d", cpu); + } + trace_seq_printf(s, "\n"); + + + trace_seq_do_printf(s); + trace_seq_reset(s); +} + +/* + * timerlat_print_summary - print the summary of the hist data to the output + */ +static void +timerlat_print_summary(struct timerlat_hist_params *params, + struct trace_instance *trace, + struct timerlat_hist_data *data) +{ + int cpu; + + if (params->no_summary) + return; + + if (!params->no_index) + trace_seq_printf(trace->seq, "count:"); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq_count); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread_count); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "min: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].min_thread); + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "avg: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + if (data->hist[cpu].irq_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_irq / data->hist[cpu].irq_count); + else + trace_seq_printf(trace->seq, " - "); + } + + if (!params->no_thread) { + if (data->hist[cpu].thread_count) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].sum_thread / data->hist[cpu].thread_count); + else + trace_seq_printf(trace->seq, " - "); + } + } + trace_seq_printf(trace->seq, "\n"); + + if (!params->no_index) + trace_seq_printf(trace->seq, "max: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_irq); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9llu ", + data->hist[cpu].max_thread); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_print_stats - print data for all CPUs + */ +static void +timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool) +{ + struct timerlat_hist_data *data = tool->data; + struct trace_instance *trace = &tool->trace; + int bucket, cpu; + int total; + + timerlat_hist_header(tool); + + for (bucket = 0; bucket < data->entries; bucket++) { + total = 0; + + if (!params->no_index) + trace_seq_printf(trace->seq, "%-6d", + bucket * data->bucket_size); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) { + total += data->hist[cpu].irq[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[bucket]); + } + + if (!params->no_thread) { + total += data->hist[cpu].thread[bucket]; + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[bucket]); + } + + } + + if (total == 0 && !params->with_zeros) { + trace_seq_reset(trace->seq); + continue; + } + + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + } + + if (!params->no_index) + trace_seq_printf(trace->seq, "over: "); + + for (cpu = 0; cpu < data->nr_cpus; cpu++) { + if (params->cpus && !params->monitored_cpus[cpu]) + continue; + + if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) + continue; + + if (!params->no_irq) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].irq[data->entries]); + + if (!params->no_thread) + trace_seq_printf(trace->seq, "%9d ", + data->hist[cpu].thread[data->entries]); + } + trace_seq_printf(trace->seq, "\n"); + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); + + timerlat_print_summary(params, trace, data); +} + +/* + * timerlat_hist_usage - prints timerlat top usage message + */ +static void timerlat_hist_usage(char *usage) +{ + int i; + + char *msg[] = { + "", + " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", + " [--no-index] [--with-zeros]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -b/--bucket-size N: set the histogram bucket size (default 1)", + " -e/--entries N: set the number of entries of the histogram (default 256)", + " --no-irq: ignore IRQ latencies", + " --no-thread: ignore thread latencies", + " --no-header: do not print header", + " --no-summary: do not print summary", + " --no-index: do not print index", + " --with-zeros: print zero only entries", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_hist_params +*timerlat_hist_parse_args(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + params->bucket_size = 1; + params->entries = 256; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"bucket-size", required_argument, 0, 'b'}, + {"debug", no_argument, 0, 'D'}, + {"entries", required_argument, 0, 'e'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {"no-irq", no_argument, 0, '0'}, + {"no-thread", no_argument, 0, '1'}, + {"no-header", no_argument, 0, '2'}, + {"no-summary", no_argument, 0, '3'}, + {"no-index", no_argument, 0, '4'}, + {"with-zeros", no_argument, 0, '5'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_hist_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'b': + params->bucket_size = get_llong_from_str(optarg); + if ((params->bucket_size == 0) || (params->bucket_size >= 1000000)) + timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n"); + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_hist_usage("Invalid -D duration\n"); + break; + case 'e': + params->entries = get_llong_from_str(optarg); + if ((params->entries < 10) || (params->entries > 9999999)) + timerlat_hist_usage("Entries must be > 10 and < 9999999\n"); + break; + case 'h': + case '?': + timerlat_hist_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_hist_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_hist_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + case '0': /* no irq */ + params->no_irq = 1; + break; + case '1': /* no thread */ + params->no_thread = 1; + break; + case '2': /* no header */ + params->no_header = 1; + break; + case '3': /* no summary */ + params->no_summary = 1; + break; + case '4': /* no index */ + params->no_index = 1; + break; + case '5': /* with zeros */ + params->with_zeros = 1; + break; + default: + timerlat_hist_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + if (params->no_irq && params->no_thread) + timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here"); + + if (params->no_index && !params->with_zeros) + timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense"); + + return params; +} + +/* + * timerlat_hist_apply_config - apply the hist configs to the initialized tool + */ +static int +timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(tool->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(tool->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + if (params->print_stack) { + retval = osnoise_set_print_stack(tool->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_hist - initialize a timerlat hist tool with parameters + */ +static struct osnoise_tool +*timerlat_init_hist(struct timerlat_hist_params *params) +{ + struct osnoise_tool *tool; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + tool = osnoise_init_tool("timerlat_hist"); + if (!tool) + return NULL; + + tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size); + if (!tool->data) + goto out_err; + + tool->params = params; + + tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", + timerlat_hist_handler, tool); + + return tool; + +out_err: + osnoise_destroy_tool(tool); + return NULL; +} + +static int stop_tracing; +static void stop_hist(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_hist_set_signals - handles the signal to stop the tool + */ +static void +timerlat_hist_set_signals(struct timerlat_hist_params *params) +{ + signal(SIGINT, stop_hist); + if (params->duration) { + signal(SIGALRM, stop_hist); + alarm(params->duration); + } +} + +int timerlat_hist_main(int argc, char *argv[]) +{ + struct timerlat_hist_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *tool; + int return_value = 1; + int retval; + + params = timerlat_hist_parse_args(argc, argv); + if (!params) + exit(1); + + tool = timerlat_init_hist(params); + if (!tool) { + err_msg("Could not init osnoise hist\n"); + goto out_exit; + } + + retval = timerlat_hist_apply_config(tool, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_hist; + } + + trace = &tool->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_hist; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_hist; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_hist; + } + trace_instance_start(&record->trace); + } + + tool->start_time = time(NULL); + timerlat_hist_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_hist; + } + + if (!tracefs_trace_is_on(trace->inst)) + break; + }; + + timerlat_print_stats(params, tool); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_hist: + timerlat_free_histogram(tool->data); + osnoise_destroy_tool(tool); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c new file mode 100644 index 000000000000..1ebd5291539c --- /dev/null +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#include <stdio.h> +#include <time.h> + +#include "utils.h" +#include "osnoise.h" +#include "timerlat.h" + +struct timerlat_top_params { + char *cpus; + char *monitored_cpus; + char *trace_output; + unsigned long long runtime; + long long stop_us; + long long stop_total_us; + long long timerlat_period_us; + long long print_stack; + int sleep_time; + int output_divisor; + int duration; + int quiet; + int set_sched; + struct sched_attr sched_param; +}; + +struct timerlat_top_cpu { + int irq_count; + int thread_count; + + unsigned long long cur_irq; + unsigned long long min_irq; + unsigned long long sum_irq; + unsigned long long max_irq; + + unsigned long long cur_thread; + unsigned long long min_thread; + unsigned long long sum_thread; + unsigned long long max_thread; +}; + +struct timerlat_top_data { + struct timerlat_top_cpu *cpu_data; + int nr_cpus; +}; + +/* + * timerlat_free_top - free runtime data + */ +static void +timerlat_free_top(struct timerlat_top_data *data) +{ + free(data->cpu_data); + free(data); +} + +/* + * timerlat_alloc_histogram - alloc runtime data + */ +static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus) +{ + struct timerlat_top_data *data; + int cpu; + + data = calloc(1, sizeof(*data)); + if (!data) + return NULL; + + data->nr_cpus = nr_cpus; + + /* one set of histograms per CPU */ + data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus); + if (!data->cpu_data) + goto cleanup; + + /* set the min to max */ + for (cpu = 0; cpu < nr_cpus; cpu++) { + data->cpu_data[cpu].min_irq = ~0; + data->cpu_data[cpu].min_thread = ~0; + } + + return data; + +cleanup: + timerlat_free_top(data); + return NULL; +} + +/* + * timerlat_hist_update - record a new timerlat occurent on cpu, updating data + */ +static void +timerlat_top_update(struct osnoise_tool *tool, int cpu, + unsigned long long thread, + unsigned long long latency) +{ + struct timerlat_top_data *data = tool->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + + if (!thread) { + cpu_data->irq_count++; + cpu_data->cur_irq = latency; + update_min(&cpu_data->min_irq, &latency); + update_sum(&cpu_data->sum_irq, &latency); + update_max(&cpu_data->max_irq, &latency); + } else { + cpu_data->thread_count++; + cpu_data->cur_thread = latency; + update_min(&cpu_data->min_thread, &latency); + update_sum(&cpu_data->sum_thread, &latency); + update_max(&cpu_data->max_thread, &latency); + } +} + +/* + * timerlat_top_handler - this is the handler for timerlat tracer events + */ +static int +timerlat_top_handler(struct trace_seq *s, struct tep_record *record, + struct tep_event *event, void *context) +{ + struct trace_instance *trace = context; + unsigned long long latency, thread; + struct osnoise_tool *top; + int cpu = record->cpu; + + top = container_of(trace, struct osnoise_tool, trace); + + tep_get_field_val(s, event, "context", record, &thread, 1); + tep_get_field_val(s, event, "timer_latency", record, &latency, 1); + + timerlat_top_update(top, cpu, thread, latency); + + return 0; +} + +/* + * timerlat_top_header - print the header of the tool output + */ +static void timerlat_top_header(struct osnoise_tool *top) +{ + struct timerlat_top_params *params = top->params; + struct trace_seq *s = top->trace.seq; + char duration[26]; + + get_duration(top->start_time, duration, sizeof(duration)); + + trace_seq_printf(s, "\033[2;37;40m"); + trace_seq_printf(s, " Timer Latency "); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); + + trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration, + params->output_divisor == 1 ? "ns" : "us", + params->output_divisor == 1 ? "ns" : "us"); + + trace_seq_printf(s, "\033[2;30;47m"); + trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max"); + trace_seq_printf(s, "\033[0;0;0m"); + trace_seq_printf(s, "\n"); +} + +/* + * timerlat_top_print - prints the output of a given CPU + */ +static void timerlat_top_print(struct osnoise_tool *top, int cpu) +{ + + struct timerlat_top_params *params = top->params; + struct timerlat_top_data *data = top->data; + struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu]; + int divisor = params->output_divisor; + struct trace_seq *s = top->trace.seq; + + if (divisor == 0) + return; + + /* + * Skip if no data is available: is this cpu offline? + */ + if (!cpu_data->irq_count && !cpu_data->thread_count) + return; + + /* + * Unless trace is being lost, IRQ counter is always the max. + */ + trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + + if (!cpu_data->irq_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - |"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); + trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor); + trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor); + } + + if (!cpu_data->thread_count) { + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " - "); + trace_seq_printf(s, " -\n"); + } else { + trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); + trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); + trace_seq_printf(s, "%9llu ", + (cpu_data->sum_thread / cpu_data->thread_count) / divisor); + trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor); + } +} + +/* + * clear_terminal - clears the output terminal + */ +static void clear_terminal(struct trace_seq *seq) +{ + if (!config_debug) + trace_seq_printf(seq, "\033c"); +} + +/* + * timerlat_print_stats - print data for all cpus + */ +static void +timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top) +{ + struct trace_instance *trace = &top->trace; + static int nr_cpus = -1; + int i; + + if (nr_cpus == -1) + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + if (!params->quiet) + clear_terminal(trace->seq); + + timerlat_top_header(top); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !params->monitored_cpus[i]) + continue; + timerlat_top_print(top, i); + } + + trace_seq_do_printf(trace->seq); + trace_seq_reset(trace->seq); +} + +/* + * timerlat_top_usage - prints timerlat top usage message + */ +static void timerlat_top_usage(char *usage) +{ + int i; + + static const char *const msg[] = { + "", + " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\", + " [-c cpu-list] [-P priority]", + "", + " -h/--help: print this menu", + " -p/--period us: timerlat period in us", + " -i/--irq us: stop trace if the irq latency is higher than the argument in us", + " -T/--thread us: stop trace if the thread latency is higher than the argument in us", + " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us", + " -c/--cpus cpus: run the tracer only on the given cpus", + " -d/--duration time[m|h|d]: duration of the session in seconds", + " -D/--debug: print debug info", + " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]", + " -n/--nano: display data in nanoseconds", + " -q/--quiet print only a summary at the end", + " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters", + " o:prio - use SCHED_OTHER with prio", + " r:prio - use SCHED_RR with prio", + " f:prio - use SCHED_FIFO with prio", + " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period", + " in nanoseconds", + NULL, + }; + + if (usage) + fprintf(stderr, "%s\n", usage); + + fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n", + VERSION); + + for (i = 0; msg[i]; i++) + fprintf(stderr, "%s\n", msg[i]); + exit(1); +} + +/* + * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters + */ +static struct timerlat_top_params +*timerlat_top_parse_args(int argc, char **argv) +{ + struct timerlat_top_params *params; + int retval; + int c; + + params = calloc(1, sizeof(*params)); + if (!params) + exit(1); + + /* display data in microseconds */ + params->output_divisor = 1000; + + while (1) { + static struct option long_options[] = { + {"cpus", required_argument, 0, 'c'}, + {"debug", no_argument, 0, 'D'}, + {"duration", required_argument, 0, 'd'}, + {"help", no_argument, 0, 'h'}, + {"irq", required_argument, 0, 'i'}, + {"nano", no_argument, 0, 'n'}, + {"period", required_argument, 0, 'p'}, + {"priority", required_argument, 0, 'P'}, + {"quiet", no_argument, 0, 'q'}, + {"stack", required_argument, 0, 's'}, + {"thread", required_argument, 0, 'T'}, + {"trace", optional_argument, 0, 't'}, + {0, 0, 0, 0} + }; + + /* getopt_long stores the option index here. */ + int option_index = 0; + + c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:", + long_options, &option_index); + + /* detect the end of the options. */ + if (c == -1) + break; + + switch (c) { + case 'c': + retval = parse_cpu_list(optarg, ¶ms->monitored_cpus); + if (retval) + timerlat_top_usage("\nInvalid -c cpu list\n"); + params->cpus = optarg; + break; + case 'D': + config_debug = 1; + break; + case 'd': + params->duration = parse_seconds_duration(optarg); + if (!params->duration) + timerlat_top_usage("Invalid -D duration\n"); + break; + case 'h': + case '?': + timerlat_top_usage(NULL); + break; + case 'i': + params->stop_us = get_llong_from_str(optarg); + break; + case 'n': + params->output_divisor = 1; + break; + case 'p': + params->timerlat_period_us = get_llong_from_str(optarg); + if (params->timerlat_period_us > 1000000) + timerlat_top_usage("Period longer than 1 s\n"); + break; + case 'P': + retval = parse_prio(optarg, ¶ms->sched_param); + if (retval == -1) + timerlat_top_usage("Invalid -P priority"); + params->set_sched = 1; + break; + case 'q': + params->quiet = 1; + break; + case 's': + params->print_stack = get_llong_from_str(optarg); + break; + case 'T': + params->stop_total_us = get_llong_from_str(optarg); + break; + case 't': + if (optarg) + /* skip = */ + params->trace_output = &optarg[1]; + else + params->trace_output = "timerlat_trace.txt"; + break; + default: + timerlat_top_usage("Invalid option"); + } + } + + if (geteuid()) { + err_msg("rtla needs root permission\n"); + exit(EXIT_FAILURE); + } + + return params; +} + +/* + * timerlat_top_apply_config - apply the top configs to the initialized tool + */ +static int +timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params) +{ + int retval; + + if (!params->sleep_time) + params->sleep_time = 1; + + if (params->cpus) { + retval = osnoise_set_cpus(top->context, params->cpus); + if (retval) { + err_msg("Failed to apply CPUs config\n"); + goto out_err; + } + } + + if (params->stop_us) { + retval = osnoise_set_stop_us(top->context, params->stop_us); + if (retval) { + err_msg("Failed to set stop us\n"); + goto out_err; + } + } + + if (params->stop_total_us) { + retval = osnoise_set_stop_total_us(top->context, params->stop_total_us); + if (retval) { + err_msg("Failed to set stop total us\n"); + goto out_err; + } + } + + + if (params->timerlat_period_us) { + retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us); + if (retval) { + err_msg("Failed to set timerlat period\n"); + goto out_err; + } + } + + + if (params->print_stack) { + retval = osnoise_set_print_stack(top->context, params->print_stack); + if (retval) { + err_msg("Failed to set print stack\n"); + goto out_err; + } + } + + return 0; + +out_err: + return -1; +} + +/* + * timerlat_init_top - initialize a timerlat top tool with parameters + */ +static struct osnoise_tool +*timerlat_init_top(struct timerlat_top_params *params) +{ + struct osnoise_tool *top; + int nr_cpus; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + top = osnoise_init_tool("timerlat_top"); + if (!top) + return NULL; + + top->data = timerlat_alloc_top(nr_cpus); + if (!top->data) + goto out_err; + + top->params = params; + + tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat", + timerlat_top_handler, top); + + return top; + +out_err: + osnoise_destroy_tool(top); + return NULL; +} + +static int stop_tracing; +static void stop_top(int sig) +{ + stop_tracing = 1; +} + +/* + * timerlat_top_set_signals - handles the signal to stop the tool + */ +static void +timerlat_top_set_signals(struct timerlat_top_params *params) +{ + signal(SIGINT, stop_top); + if (params->duration) { + signal(SIGALRM, stop_top); + alarm(params->duration); + } +} + +int timerlat_top_main(int argc, char *argv[]) +{ + struct timerlat_top_params *params; + struct trace_instance *trace; + struct osnoise_tool *record; + struct osnoise_tool *top; + int return_value = 1; + int retval; + + params = timerlat_top_parse_args(argc, argv); + if (!params) + exit(1); + + top = timerlat_init_top(params); + if (!top) { + err_msg("Could not init osnoise top\n"); + goto out_exit; + } + + retval = timerlat_top_apply_config(top, params); + if (retval) { + err_msg("Could not apply config\n"); + goto out_top; + } + + trace = &top->trace; + + retval = enable_timerlat(trace); + if (retval) { + err_msg("Failed to enable timerlat tracer\n"); + goto out_top; + } + + if (params->set_sched) { + retval = set_comm_sched_attr("timerlat/", ¶ms->sched_param); + if (retval) { + err_msg("Failed to set sched parameters\n"); + goto out_top; + } + } + + trace_instance_start(trace); + + if (params->trace_output) { + record = osnoise_init_trace_tool("timerlat"); + if (!record) { + err_msg("Failed to enable the trace instance\n"); + goto out_top; + } + trace_instance_start(&record->trace); + } + + top->start_time = time(NULL); + timerlat_top_set_signals(params); + + while (!stop_tracing) { + sleep(params->sleep_time); + + retval = tracefs_iterate_raw_events(trace->tep, + trace->inst, + NULL, + 0, + collect_registered_events, + trace); + if (retval < 0) { + err_msg("Error iterating on events\n"); + goto out_top; + } + + if (!params->quiet) + timerlat_print_stats(params, top); + + if (!tracefs_trace_is_on(trace->inst)) + break; + + }; + + timerlat_print_stats(params, top); + + return_value = 0; + + if (!tracefs_trace_is_on(trace->inst)) { + printf("rtla timelat hit stop tracing\n"); + if (params->trace_output) { + printf(" Saving trace to %s\n", params->trace_output); + save_trace_to_file(record->trace.inst, params->trace_output); + } + } + +out_top: + timerlat_free_top(top->data); + osnoise_destroy_tool(top); + if (params->trace_output) + osnoise_destroy_tool(record); + free(params); +out_exit: + exit(return_value); +} diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c new file mode 100644 index 000000000000..107a0c6387f7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/sendfile.h> +#include <tracefs.h> +#include <signal.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> + +#include "trace.h" +#include "utils.h" + +/* + * enable_tracer_by_name - enable a tracer on the given instance + */ +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name) +{ + enum tracefs_tracers tracer; + int retval; + + tracer = TRACEFS_TRACER_CUSTOM; + + debug_msg("enabling %s tracer\n", tracer_name); + + retval = tracefs_tracer_set(inst, tracer, tracer_name); + if (retval < 0) { + if (errno == ENODEV) + err_msg("tracer %s not found!\n", tracer_name); + + err_msg("failed to enable the tracer %s\n", tracer_name); + return -1; + } + + return 0; +} + +/* + * disable_tracer - set nop tracer to the insta + */ +void disable_tracer(struct tracefs_instance *inst) +{ + enum tracefs_tracers t = TRACEFS_TRACER_NOP; + int retval; + + retval = tracefs_tracer_set(inst, t); + if (retval < 0) + err_msg("oops, error disabling tracer\n"); +} + +/* + * create_instance - create a trace instance with *instance_name + */ +struct tracefs_instance *create_instance(char *instance_name) +{ + return tracefs_instance_create(instance_name); +} + +/* + * destroy_instance - remove a trace instance and free the data + */ +void destroy_instance(struct tracefs_instance *inst) +{ + tracefs_instance_destroy(inst); + tracefs_instance_free(inst); +} + +/* + * save_trace_to_file - save the trace output of the instance to the file + */ +int save_trace_to_file(struct tracefs_instance *inst, const char *filename) +{ + const char *file = "trace"; + mode_t mode = 0644; + char buffer[4096]; + int out_fd, in_fd; + int retval = -1; + + in_fd = tracefs_instance_file_open(inst, file, O_RDONLY); + if (in_fd < 0) { + err_msg("Failed to open trace file\n"); + return -1; + } + + out_fd = creat(filename, mode); + if (out_fd < 0) { + err_msg("Failed to create output file %s\n", filename); + goto out_close_in; + } + + do { + retval = read(in_fd, buffer, sizeof(buffer)); + if (retval <= 0) + goto out_close; + + retval = write(out_fd, buffer, retval); + if (retval < 0) + goto out_close; + } while (retval > 0); + + retval = 0; +out_close: + close(out_fd); +out_close_in: + close(in_fd); + return retval; +} + +/* + * collect_registered_events - call the existing callback function for the event + * + * If an event has a registered callback function, call it. + * Otherwise, ignore the event. + */ +int +collect_registered_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + struct trace_seq *s = trace->seq; + + if (!event->handler) + return 0; + + event->handler(s, record, event, context); + + return 0; +} + +/* + * trace_instance_destroy - destroy and free a rtla trace instance + */ +void trace_instance_destroy(struct trace_instance *trace) +{ + if (trace->inst) { + disable_tracer(trace->inst); + destroy_instance(trace->inst); + } + + if (trace->seq) + free(trace->seq); + + if (trace->tep) + tep_free(trace->tep); +} + +/* + * trace_instance_init - create an rtla trace instance + * + * It is more than the tracefs instance, as it contains other + * things required for the tracing, such as the local events and + * a seq file. + * + * Note that the trace instance is returned disabled. This allows + * the tool to apply some other configs, like setting priority + * to the kernel threads, before starting generating trace entries. + */ +int trace_instance_init(struct trace_instance *trace, char *tool_name) +{ + trace->seq = calloc(1, sizeof(*trace->seq)); + if (!trace->seq) + goto out_err; + + trace_seq_init(trace->seq); + + trace->inst = create_instance(tool_name); + if (!trace->inst) + goto out_err; + + trace->tep = tracefs_local_events(NULL); + if (!trace->tep) + goto out_err; + + /* + * Let the main enable the record after setting some other + * things such as the priority of the tracer's threads. + */ + tracefs_trace_off(trace->inst); + + return 0; + +out_err: + trace_instance_destroy(trace); + return 1; +} + +/* + * trace_instance_start - start tracing a given rtla instance + */ +int trace_instance_start(struct trace_instance *trace) +{ + return tracefs_trace_on(trace->inst); +} diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h new file mode 100644 index 000000000000..0ea1df0ad9a7 --- /dev/null +++ b/tools/tracing/rtla/src/trace.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <tracefs.h> +#include <stddef.h> + +struct trace_instance { + struct tracefs_instance *inst; + struct tep_handle *tep; + struct trace_seq *seq; +}; + +int trace_instance_init(struct trace_instance *trace, char *tool_name); +int trace_instance_start(struct trace_instance *trace); +void trace_instance_destroy(struct trace_instance *trace); + +struct trace_seq *get_trace_seq(void); +int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name); +void disable_tracer(struct tracefs_instance *inst); + +int enable_osnoise(struct trace_instance *trace); +int enable_timerlat(struct trace_instance *trace); + +struct tracefs_instance *create_instance(char *instance_name); +void destroy_instance(struct tracefs_instance *inst); + +int save_trace_to_file(struct tracefs_instance *inst, const char *filename); +int collect_registered_events(struct tep_event *tep, struct tep_record *record, + int cpu, void *context); diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c new file mode 100644 index 000000000000..1c9f0eea6166 --- /dev/null +++ b/tools/tracing/rtla/src/utils.c @@ -0,0 +1,433 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> + */ + +#include <proc/readproc.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <sched.h> +#include <stdio.h> + +#include "utils.h" + +#define MAX_MSG_LENGTH 1024 +int config_debug; + +/* + * err_msg - print an error message to the stderr + */ +void err_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * debug_msg - print a debug message to stderr if debug is set + */ +void debug_msg(const char *fmt, ...) +{ + char message[MAX_MSG_LENGTH]; + va_list ap; + + if (!config_debug) + return; + + va_start(ap, fmt); + vsnprintf(message, sizeof(message), fmt, ap); + va_end(ap); + + fprintf(stderr, "%s", message); +} + +/* + * get_llong_from_str - get a long long int from a string + */ +long long get_llong_from_str(char *start) +{ + long long value; + char *end; + + errno = 0; + value = strtoll(start, &end, 10); + if (errno || start == end) + return -1; + + return value; +} + +/* + * get_duration - fill output with a human readable duration since start_time + */ +void get_duration(time_t start_time, char *output, int output_size) +{ + time_t now = time(NULL); + struct tm *tm_info; + time_t duration; + + duration = difftime(now, start_time); + tm_info = localtime(&duration); + + snprintf(output, output_size, "%3d %02d:%02d:%02d", + tm_info->tm_yday, + tm_info->tm_hour - 1, + tm_info->tm_min, + tm_info->tm_sec); +} + +/* + * parse_cpu_list - parse a cpu_list filling a char vector with cpus set + * + * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char + * in the monitored_cpus. + * + * XXX: convert to a bitmask. + */ +int parse_cpu_list(char *cpu_list, char **monitored_cpus) +{ + char *mon_cpus; + const char *p; + int end_cpu; + int nr_cpus; + int cpu; + int i; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + mon_cpus = malloc(nr_cpus * sizeof(char)); + memset(mon_cpus, 0, (nr_cpus * sizeof(char))); + + for (p = cpu_list; *p; ) { + cpu = atoi(p); + if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus) + goto err; + + while (isdigit(*p)) + p++; + if (*p == '-') { + p++; + end_cpu = atoi(p); + if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus) + goto err; + while (isdigit(*p)) + p++; + } else + end_cpu = cpu; + + if (cpu == end_cpu) { + debug_msg("cpu_list: adding cpu %d\n", cpu); + mon_cpus[cpu] = 1; + } else { + for (i = cpu; i <= end_cpu; i++) { + debug_msg("cpu_list: adding cpu %d\n", i); + mon_cpus[i] = 1; + } + } + + if (*p == ',') + p++; + } + + *monitored_cpus = mon_cpus; + + return 0; + +err: + debug_msg("Error parsing the cpu list %s", cpu_list); + return 1; +} + +/* + * parse_duration - parse duration with s/m/h/d suffix converting it to seconds + */ +long parse_seconds_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + switch (*end) { + case 's': + case 'S': + break; + case 'm': + case 'M': + t *= 60; + break; + case 'h': + case 'H': + t *= 60 * 60; + break; + + case 'd': + case 'D': + t *= 24 * 60 * 60; + break; + } + } + + return t; +} + +/* + * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds + */ +long parse_ns_duration(char *val) +{ + char *end; + long t; + + t = strtol(val, &end, 10); + + if (end) { + if (!strncmp(end, "ns", 2)) { + return t; + } else if (!strncmp(end, "us", 2)) { + t *= 1000; + return t; + } else if (!strncmp(end, "ms", 2)) { + t *= 1000 * 1000; + return t; + } else if (!strncmp(end, "s", 1)) { + t *= 1000 * 1000 * 1000; + return t; + } + return -1; + } + + return t; +} + +/* + * This is a set of helper functions to use SCHED_DEADLINE. + */ +#ifdef __x86_64__ +# define __NR_sched_setattr 314 +# define __NR_sched_getattr 315 +#elif __i386__ +# define __NR_sched_setattr 351 +# define __NR_sched_getattr 352 +#elif __arm__ +# define __NR_sched_setattr 380 +# define __NR_sched_getattr 381 +#elif __aarch64__ +# define __NR_sched_setattr 274 +# define __NR_sched_getattr 275 +#elif __powerpc__ +# define __NR_sched_setattr 355 +# define __NR_sched_getattr 356 +#elif __s390x__ +# define __NR_sched_setattr 345 +# define __NR_sched_getattr 346 +#endif + +#define SCHED_DEADLINE 6 + +static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, + unsigned int flags) { + return syscall(__NR_sched_setattr, pid, attr, flags); +} + +static inline int sched_getattr(pid_t pid, struct sched_attr *attr, + unsigned int size, unsigned int flags) +{ + return syscall(__NR_sched_getattr, pid, attr, size, flags); +} + +int __set_sched_attr(int pid, struct sched_attr *attr) +{ + int flags = 0; + int retval; + + retval = sched_setattr(pid, attr, flags); + if (retval < 0) { + err_msg("boost_with_deadline failed to boost pid %d: %s\n", + pid, strerror(errno)); + return 1; + } + + return 0; +} +/* + * set_comm_sched_attr - set sched params to threads starting with char *comm + * + * This function uses procps to list the currently running threads and then + * set the sched_attr *attr to the threads that start with char *comm. It is + * mainly used to set the priority to the kernel threads created by the + * tracers. + */ +int set_comm_sched_attr(const char *comm, struct sched_attr *attr) +{ + int flags = PROC_FILLCOM | PROC_FILLSTAT; + PROCTAB *ptp; + proc_t task; + int retval; + + ptp = openproc(flags); + if (!ptp) { + err_msg("error openproc()\n"); + return -ENOENT; + } + + memset(&task, 0, sizeof(task)); + + while (readproc(ptp, &task)) { + retval = strncmp(comm, task.cmd, strlen(comm)); + if (retval) + continue; + retval = __set_sched_attr(task.tid, attr); + if (retval) + goto out_err; + } + + closeproc(ptp); + return 0; + +out_err: + closeproc(ptp); + return 1; +} + +#define INVALID_VAL (~0L) +static long get_long_ns_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = parse_ns_duration(start); + + return val; +} + +static long get_long_after_colon(char *start) +{ + long val = INVALID_VAL; + + /* find the ":" */ + start = strstr(start, ":"); + if (!start) + return -1; + + /* skip ":" */ + start++; + val = get_llong_from_str(start); + + return val; +} + +/* + * parse priority in the format: + * SCHED_OTHER: + * o:<prio> + * O:<prio> + * SCHED_RR: + * r:<prio> + * R:<prio> + * SCHED_FIFO: + * f:<prio> + * F:<prio> + * SCHED_DEADLINE: + * d:runtime:period + * D:runtime:period + */ +int parse_prio(char *arg, struct sched_attr *sched_param) +{ + long prio; + long runtime; + long period; + + memset(sched_param, 0, sizeof(*sched_param)); + sched_param->size = sizeof(*sched_param); + + switch (arg[0]) { + case 'd': + case 'D': + /* d:runtime:period */ + if (strlen(arg) < 4) + return -1; + + runtime = get_long_ns_after_colon(arg); + if (runtime == INVALID_VAL) + return -1; + + period = get_long_ns_after_colon(&arg[2]); + if (period == INVALID_VAL) + return -1; + + if (runtime > period) + return -1; + + sched_param->sched_policy = SCHED_DEADLINE; + sched_param->sched_runtime = runtime; + sched_param->sched_deadline = period; + sched_param->sched_period = period; + break; + case 'f': + case 'F': + /* f:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_FIFO)) + return -1; + if (prio > sched_get_priority_max(SCHED_FIFO)) + return -1; + + sched_param->sched_policy = SCHED_FIFO; + sched_param->sched_priority = prio; + break; + case 'r': + case 'R': + /* r:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_RR)) + return -1; + if (prio > sched_get_priority_max(SCHED_RR)) + return -1; + + sched_param->sched_policy = SCHED_RR; + sched_param->sched_priority = prio; + break; + case 'o': + case 'O': + /* o:prio */ + prio = get_long_after_colon(arg); + if (prio == INVALID_VAL) + return -1; + + if (prio < sched_get_priority_min(SCHED_OTHER)) + return -1; + if (prio > sched_get_priority_max(SCHED_OTHER)) + return -1; + + sched_param->sched_policy = SCHED_OTHER; + sched_param->sched_priority = prio; + break; + default: + return -1; + } + return 0; +} diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h new file mode 100644 index 000000000000..9aa962319ca2 --- /dev/null +++ b/tools/tracing/rtla/src/utils.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <time.h> + +/* + * '18446744073709551615\0' + */ +#define BUFF_U64_STR_SIZE 24 + +#define container_of(ptr, type, member)({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)) ; }) + +extern int config_debug; +void debug_msg(const char *fmt, ...); +void err_msg(const char *fmt, ...); + +long parse_seconds_duration(char *val); +void get_duration(time_t start_time, char *output, int output_size); + +int parse_cpu_list(char *cpu_list, char **monitored_cpus); +long long get_llong_from_str(char *start); + +static inline void +update_min(unsigned long long *a, unsigned long long *b) +{ + if (*a > *b) + *a = *b; +} + +static inline void +update_max(unsigned long long *a, unsigned long long *b) +{ + if (*a < *b) + *a = *b; +} + +static inline void +update_sum(unsigned long long *a, unsigned long long *b) +{ + *a += *b; +} + +struct sched_attr { + uint32_t size; + uint32_t sched_policy; + uint64_t sched_flags; + int32_t sched_nice; + uint32_t sched_priority; + uint64_t sched_runtime; + uint64_t sched_deadline; + uint64_t sched_period; +}; + +int parse_prio(char *arg, struct sched_attr *sched_param); +int set_comm_sched_attr(const char *comm, struct sched_attr *attr); |