diff options
Diffstat (limited to 'tools/lib/bpf')
-rw-r--r-- | tools/lib/bpf/.gitignore | 4 | ||||
-rw-r--r-- | tools/lib/bpf/Makefile | 83 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.c | 11 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 10 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_core_read.h | 263 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_endian.h | 72 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 47 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_prog_linfo.c | 14 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_tracing.h | 195 | ||||
-rw-r--r-- | tools/lib/bpf/btf.c | 97 | ||||
-rw-r--r-- | tools/lib/bpf/btf.h | 6 | ||||
-rw-r--r-- | tools/lib/bpf/btf_dump.c | 37 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 2104 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 99 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 18 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_internal.h | 79 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_probes.c | 1 | ||||
-rw-r--r-- | tools/lib/bpf/netlink.c | 87 | ||||
-rw-r--r-- | tools/lib/bpf/nlattr.c | 10 | ||||
-rw-r--r-- | tools/lib/bpf/test_libbpf.c (renamed from tools/lib/bpf/test_libbpf.cpp) | 14 | ||||
-rw-r--r-- | tools/lib/bpf/xsk.c | 177 |
21 files changed, 2508 insertions, 920 deletions
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index d9e9dec04605..35bf013e368c 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -3,3 +3,7 @@ libbpf.pc FEATURE-DUMP.libbpf test_libbpf libbpf.so.* +TAGS +tags +cscope.* +/bpf_helper_defs.h diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index c6f94cffe06e..99425d0be6ff 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -8,7 +8,11 @@ LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION))) MAKEFLAGS += --no-print-directory -ifeq ($(srctree),) +# This will work when bpf is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is a ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -52,7 +56,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -110,6 +114,9 @@ override CFLAGS += $(INCLUDES) override CFLAGS += -fvisibility=hidden override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +# flags specific for shared library +SHLIB_FLAGS := -DSHARED + ifeq ($(VERBOSE),1) Q = else @@ -126,36 +133,33 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o +SHARED_OBJDIR := $(OUTPUT)sharedobjs/ +STATIC_OBJDIR := $(OUTPUT)staticobjs/ +BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o +BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o VERSION_SCRIPT := libbpf.map LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE)) -GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ +TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) + +GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) - -CXX_TEST_TARGET = $(OUTPUT)test_libbpf - -ifeq ($(feature-cxx), 1) - CMD_TARGETS += $(CXX_TEST_TARGET) -endif - -TARGETS = $(CMD_TARGETS) +CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) $(OUTPUT)test_libbpf all: fixdep $(Q)$(MAKE) all_cmd all_cmd: $(CMD_TARGETS) check -$(BPF_IN): force elfdep bpfdep +$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -171,21 +175,29 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true - $(Q)$(MAKE) $(build)=libbpf + $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" + +$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h + $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) + +bpf_helper_defs.h: $(srctree)/include/uapi/linux/bpf.h + $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ + --file $(srctree)/include/uapi/linux/bpf.h > bpf_helper_defs.h $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) -$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) - $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) + $(QUIET_LINK)$(CC) $(LDFLAGS) \ + --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) -$(OUTPUT)libbpf.a: $(BPF_IN) +$(OUTPUT)libbpf.a: $(BPF_IN_STATIC) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a - $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ +$(OUTPUT)test_libbpf: test_libbpf.c $(OUTPUT)libbpf.a + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(INCLUDES) $^ -lelf -o $@ $(OUTPUT)libbpf.pc: $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ @@ -197,7 +209,7 @@ check: check_abi check_abi: $(OUTPUT)libbpf.so @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ - echo "Warning: Num of global symbols in $(BPF_IN)" \ + echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \ "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ "Please make sure all LIBBPF_API symbols are" \ @@ -234,13 +246,18 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -install_headers: +install_headers: bpf_helper_defs.h $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ - $(call do_install,xsk.h,$(prefix)/include/bpf,644); + $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \ + $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644); install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ @@ -255,14 +272,15 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ - *.pc LIBBPF-CFLAGS + *.pc LIBBPF-CFLAGS bpf_helper_defs.h \ + $(SHARED_OBJDIR) $(STATIC_OBJDIR) $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -PHONY += force elfdep bpfdep +PHONY += force elfdep bpfdep cscope tags force: elfdep: @@ -271,6 +289,17 @@ elfdep: bpfdep: @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi +cscope: + ls *.c *.h > cscope.files + cscope -b -q -I $(srctree)/include -f cscope.out + +tags: + rm -f TAGS tags + ls *.c *.h | xargs $(TAGS_PROG) -a + # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index cbb933532981..98596e15390f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -189,7 +189,7 @@ static void * alloc_zero_tailing_info(const void *orecord, __u32 cnt, __u32 actual_rec_size, __u32 expected_rec_size) { - __u64 info_len = actual_rec_size * cnt; + __u64 info_len = (__u64)actual_rec_size * cnt; void *info, *nrecord; int i; @@ -228,6 +228,13 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; + if (attr.prog_type == BPF_PROG_TYPE_TRACING) { + attr.attach_btf_id = load_attr->attach_btf_id; + attr.attach_prog_fd = load_attr->attach_prog_fd; + } else { + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = load_attr->kern_version; + } attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); @@ -241,8 +248,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_size = 0; } - attr.kern_version = load_attr->kern_version; - attr.prog_ifindex = load_attr->prog_ifindex; attr.prog_btf_fd = load_attr->prog_btf_fd; attr.func_info_rec_size = load_attr->func_info_rec_size; attr.func_info_cnt = load_attr->func_info_cnt; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 0db01334740f..3c791fa8e68e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -77,8 +77,14 @@ struct bpf_load_program_attr { const struct bpf_insn *insns; size_t insns_cnt; const char *license; - __u32 kern_version; - __u32 prog_ifindex; + union { + __u32 kern_version; + __u32 attach_prog_fd; + }; + union { + __u32 prog_ifindex; + __u32 attach_btf_id; + }; __u32 prog_btf_fd; __u32 func_info_rec_size; const void *func_info; diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h new file mode 100644 index 000000000000..7009dc90e012 --- /dev/null +++ b/tools/lib/bpf/bpf_core_read.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_CORE_READ_H__ +#define __BPF_CORE_READ_H__ + +/* + * enum bpf_field_info_kind is passed as a second argument into + * __builtin_preserve_field_info() built-in to get a specific aspect of + * a field, captured as a first argument. __builtin_preserve_field_info(field, + * info_kind) returns __u32 integer and produces BTF field relocation, which + * is understood and processed by libbpf during BPF object loading. See + * selftests/bpf for examples. + */ +enum bpf_field_info_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, + BPF_FIELD_LSHIFT_U64 = 4, + BPF_FIELD_RSHIFT_U64 = 5, +}; + +#define __CORE_RELO(src, field, info) \ + __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ + bpf_probe_read((void *)dst, \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +#else +/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so + * for big-endian we need to adjust destination pointer accordingly, based on + * field byte size + */ +#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ + bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ + __CORE_RELO(src, fld, BYTE_SIZE), \ + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +#endif + +/* + * Extract bitfield, identified by s->field, and return its value as u64. + * All this is done in relocatable manner, so bitfield changes such as + * signedness, bit size, offset changes, this will be handled automatically. + * This version of macro is using bpf_probe_read() to read underlying integer + * storage. Macro functions as an expression and its return type is + * bpf_probe_read()'s return value: 0, on success, <0 on error. + */ +#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ + unsigned long long val = 0; \ + \ + __CORE_BITFIELD_PROBE_READ(&val, s, field); \ + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ + if (__CORE_RELO(s, field, SIGNED)) \ + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ + else \ + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ + val; \ +}) + +/* + * Extract bitfield, identified by s->field, and return its value as u64. + * This version of macro is using direct memory reads and should be used from + * BPF program types that support such functionality (e.g., typed raw + * tracepoints). + */ +#define BPF_CORE_READ_BITFIELD(s, field) ({ \ + const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned long long val; \ + \ + switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ + case 1: val = *(const unsigned char *)p; \ + case 2: val = *(const unsigned short *)p; \ + case 4: val = *(const unsigned int *)p; \ + case 8: val = *(const unsigned long long *)p; \ + } \ + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ + if (__CORE_RELO(s, field, SIGNED)) \ + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ + else \ + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ + val; \ +}) + +/* + * Convenience macro to check that field actually exists in target kernel's. + * Returns: + * 1, if matching field is present in target kernel; + * 0, if no matching field found. + */ +#define bpf_core_field_exists(field) \ + __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) + +/* + * Convenience macro to get byte size of a field. Works for integers, + * struct/unions, pointers, arrays, and enums. + */ +#define bpf_core_field_size(field) \ + __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) + +/* + * bpf_core_read() abstracts away bpf_probe_read() call and captures offset + * relocation for source address using __builtin_preserve_access_index() + * built-in, provided by Clang. + * + * __builtin_preserve_access_index() takes as an argument an expression of + * taking an address of a field within struct/union. It makes compiler emit + * a relocation, which records BTF type ID describing root struct/union and an + * accessor string which describes exact embedded field that was used to take + * an address. See detailed description of this relocation format and + * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * + * This relocation allows libbpf to adjust BPF instruction to use correct + * actual field offset, based on target kernel BTF type that matches original + * (local) BTF, used to record relocation. + */ +#define bpf_core_read(dst, sz, src) \ + bpf_probe_read(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) + +/* + * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() + * additionally emitting BPF CO-RE field relocation for specified source + * argument. + */ +#define bpf_core_read_str(dst, sz, src) \ + bpf_probe_read_str(dst, sz, \ + (const void *)__builtin_preserve_access_index(src)) + +#define ___concat(a, b) a ## b +#define ___apply(fn, n) ___concat(fn, n) +#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N + +/* + * return number of provided arguments; used for switch-based variadic macro + * definitions (see ___last, ___arrow, etc below) + */ +#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +/* + * return 0 if no arguments are passed, N - otherwise; used for + * recursively-defined macros to specify termination (0) case, and generic + * (N) case (e.g., ___read_ptrs, ___core_read) + */ +#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___last1(x) x +#define ___last2(a, x) x +#define ___last3(a, b, x) x +#define ___last4(a, b, c, x) x +#define ___last5(a, b, c, d, x) x +#define ___last6(a, b, c, d, e, x) x +#define ___last7(a, b, c, d, e, f, x) x +#define ___last8(a, b, c, d, e, f, g, x) x +#define ___last9(a, b, c, d, e, f, g, h, x) x +#define ___last10(a, b, c, d, e, f, g, h, i, x) x +#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___nolast2(a, _) a +#define ___nolast3(a, b, _) a, b +#define ___nolast4(a, b, c, _) a, b, c +#define ___nolast5(a, b, c, d, _) a, b, c, d +#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e +#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f +#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g +#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h +#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i +#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___arrow1(a) a +#define ___arrow2(a, b) a->b +#define ___arrow3(a, b, c) a->b->c +#define ___arrow4(a, b, c, d) a->b->c->d +#define ___arrow5(a, b, c, d, e) a->b->c->d->e +#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f +#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g +#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h +#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i +#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j +#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___type(...) typeof(___arrow(__VA_ARGS__)) + +#define ___read(read_fn, dst, src_type, src, accessor) \ + read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) + +/* "recursively" read a sequence of inner pointers using local __t var */ +#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); +#define ___rd_last(...) \ + ___read(bpf_core_read, &__t, \ + ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) +#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___read_ptrs(src, ...) \ + ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) + +#define ___core_read0(fn, dst, src, a) \ + ___read(fn, dst, ___type(src), src, a); +#define ___core_readN(fn, dst, src, ...) \ + ___read_ptrs(src, ___nolast(__VA_ARGS__)) \ + ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \ + ___last(__VA_ARGS__)); +#define ___core_read(fn, dst, src, a, ...) \ + ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \ + src, a, ##__VA_ARGS__) + +/* + * BPF_CORE_READ_INTO() is a more performance-conscious variant of + * BPF_CORE_READ(), in which final field is read into user-provided storage. + * See BPF_CORE_READ() below for more details on general usage. + */ +#define BPF_CORE_READ_INTO(dst, src, a, ...) \ + ({ \ + ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as + * BPF_CORE_READ() for intermediate pointers, but then executes (and returns + * corresponding error code) bpf_core_read_str() for final string read. + */ +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ + ({ \ + ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ + }) + +/* + * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially + * when there are few pointer chasing steps. + * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: + * int x = s->a.b.c->d.e->f->g; + * can be succinctly achieved using BPF_CORE_READ as: + * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); + * + * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF + * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * 1. const void *__t = s->a.b.c; + * 2. __t = __t->d.e; + * 3. __t = __t->f; + * 4. return __t->g; + * + * Equivalence is logical, because there is a heavy type casting/preservation + * involved, as well as all the reads are happening through bpf_probe_read() + * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * + * N.B. Only up to 9 "field accessors" are supported, which should be more + * than enough for any practical purpose. + */ +#define BPF_CORE_READ(src, a, ...) \ + ({ \ + ___type(src, a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ + __r; \ + }) + +#endif + diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h new file mode 100644 index 000000000000..fbe28008450f --- /dev/null +++ b/tools/lib/bpf/bpf_endian.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_ENDIAN__ +#define __BPF_ENDIAN__ + +#include <linux/stddef.h> +#include <linux/swab.h> + +/* LLVM's BPF target selects the endianness of the CPU + * it compiles on, or the user specifies (bpfel/bpfeb), + * respectively. The used __BYTE_ORDER__ is defined by + * the compiler, we cannot rely on __BYTE_ORDER from + * libc headers, since it doesn't reflect the actual + * requested byte order. + * + * Note, LLVM's BPF target has different __builtin_bswapX() + * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE + * in bpfel and bpfeb case, which means below, that we map + * to cpu_to_be16(). We could use it unconditionally in BPF + * case, but better not rely on it, so that this header here + * can be used from application and BPF program side, which + * use different targets. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __bpf_ntohs(x) __builtin_bswap16(x) +# define __bpf_htons(x) __builtin_bswap16(x) +# define __bpf_constant_ntohs(x) ___constant_swab16(x) +# define __bpf_constant_htons(x) ___constant_swab16(x) +# define __bpf_ntohl(x) __builtin_bswap32(x) +# define __bpf_htonl(x) __builtin_bswap32(x) +# define __bpf_constant_ntohl(x) ___constant_swab32(x) +# define __bpf_constant_htonl(x) ___constant_swab32(x) +# define __bpf_be64_to_cpu(x) __builtin_bswap64(x) +# define __bpf_cpu_to_be64(x) __builtin_bswap64(x) +# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x) +# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __bpf_ntohs(x) (x) +# define __bpf_htons(x) (x) +# define __bpf_constant_ntohs(x) (x) +# define __bpf_constant_htons(x) (x) +# define __bpf_ntohl(x) (x) +# define __bpf_htonl(x) (x) +# define __bpf_constant_ntohl(x) (x) +# define __bpf_constant_htonl(x) (x) +# define __bpf_be64_to_cpu(x) (x) +# define __bpf_cpu_to_be64(x) (x) +# define __bpf_constant_be64_to_cpu(x) (x) +# define __bpf_constant_cpu_to_be64(x) (x) +#else +# error "Fix your compiler's __BYTE_ORDER__?!" +#endif + +#define bpf_htons(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htons(x) : __bpf_htons(x)) +#define bpf_ntohs(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohs(x) : __bpf_ntohs(x)) +#define bpf_htonl(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_htonl(x) : __bpf_htonl(x)) +#define bpf_ntohl(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_ntohl(x) : __bpf_ntohl(x)) +#define bpf_cpu_to_be64(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) +#define bpf_be64_to_cpu(x) \ + (__builtin_constant_p(x) ? \ + __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) + +#endif /* __BPF_ENDIAN__ */ diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h new file mode 100644 index 000000000000..0c7d28292898 --- /dev/null +++ b/tools/lib/bpf/bpf_helpers.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_HELPERS__ +#define __BPF_HELPERS__ + +#include "bpf_helper_defs.h" + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) typeof(val) *name + +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* + * Helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +#ifndef __always_inline +#define __always_inline __attribute__((always_inline)) +#endif + +/* + * Helper structure used by eBPF C program + * to describe BPF map attributes to libbpf loader + */ +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; +}; + +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + +#endif diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 8c67561c93b0..3ed1a27b5f7c 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -101,6 +101,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) { struct bpf_prog_linfo *prog_linfo; __u32 nr_linfo, nr_jited_func; + __u64 data_sz; nr_linfo = info->nr_line_info; @@ -122,11 +123,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) /* Copy xlated line_info */ prog_linfo->nr_linfo = nr_linfo; prog_linfo->rec_size = info->line_info_rec_size; - prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); + data_sz = (__u64)nr_linfo * prog_linfo->rec_size; + prog_linfo->raw_linfo = malloc(data_sz); if (!prog_linfo->raw_linfo) goto err_free; - memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, - nr_linfo * prog_linfo->rec_size); + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz); nr_jited_func = info->nr_jited_ksyms; if (!nr_jited_func || @@ -142,13 +143,12 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) /* Copy jited_line_info */ prog_linfo->nr_jited_func = nr_jited_func; prog_linfo->jited_rec_size = info->jited_line_info_rec_size; - prog_linfo->raw_jited_linfo = malloc(nr_linfo * - prog_linfo->jited_rec_size); + data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size; + prog_linfo->raw_jited_linfo = malloc(data_sz); if (!prog_linfo->raw_jited_linfo) goto err_free; memcpy(prog_linfo->raw_jited_linfo, - (void *)(long)info->jited_line_info, - nr_linfo * prog_linfo->jited_rec_size); + (void *)(long)info->jited_line_info, data_sz); /* Number of jited_line_info per jited func */ prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h new file mode 100644 index 000000000000..b0dafe8b4ebc --- /dev/null +++ b/tools/lib/bpf/bpf_tracing.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_H__ +#define __BPF_TRACING_H__ + +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) + #define bpf_target_x86 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_s390) + #define bpf_target_s390 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm) + #define bpf_target_arm + #define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) + #define bpf_target_arm64 + #define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) + #define bpf_target_mips + #define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) + #define bpf_target_powerpc + #define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) + #define bpf_target_sparc + #define bpf_target_defined +#else + #undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined +#if defined(__x86_64__) + #define bpf_target_x86 +#elif defined(__s390__) + #define bpf_target_s390 +#elif defined(__arm__) + #define bpf_target_arm +#elif defined(__aarch64__) + #define bpf_target_arm64 +#elif defined(__mips__) + #define bpf_target_mips +#elif defined(__powerpc__) + #define bpf_target_powerpc +#elif defined(__sparc__) + #define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) + +#ifdef __KERNEL__ +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) +#else +#ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) +#else +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) +#endif +#endif + +#elif defined(bpf_target_s390) + +/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) + +#elif defined(bpf_target_arm) + +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + +#elif defined(bpf_target_arm64) + +/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) + +#elif defined(bpf_target_mips) + +#define PT_REGS_PARM1(x) ((x)->regs[4]) +#define PT_REGS_PARM2(x) ((x)->regs[5]) +#define PT_REGS_PARM3(x) ((x)->regs[6]) +#define PT_REGS_PARM4(x) ((x)->regs[7]) +#define PT_REGS_PARM5(x) ((x)->regs[8]) +#define PT_REGS_RET(x) ((x)->regs[31]) +#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[1]) +#define PT_REGS_SP(x) ((x)->regs[29]) +#define PT_REGS_IP(x) ((x)->cp0_epc) + +#elif defined(bpf_target_powerpc) + +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) + +#elif defined(bpf_target_sparc) + +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ +#if defined(__arch64__) +#define PT_REGS_IP(x) ((x)->tpc) +#else +#define PT_REGS_IP(x) ((x)->pc) +#endif + +#endif + +#if defined(bpf_target_powerpc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#elif defined(bpf_target_sparc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#else +#define BPF_KPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ + ({ bpf_probe_read(&(ip), sizeof(ip), \ + (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +#endif + +#endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1aa189a9112a..88efa2bb7137 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -269,10 +269,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); } +done: if (size < 0) return -EINVAL; - -done: if (nelems && size > UINT32_MAX / nelems) return -E2BIG; @@ -317,6 +316,28 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return -ENOENT; } +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, + __u32 kind) +{ + __u32 i; + + if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) + return 0; + + for (i = 1; i <= btf->nr_types; i++) { + const struct btf_type *t = btf->types[i]; + const char *name; + + if (btf_kind(t) != kind) + continue; + name = btf__name_by_offset(btf, t->name_off); + if (name && !strcmp(type_name, name)) + return i; + } + + return -ENOENT; +} + void btf__free(struct btf *btf) { if (!btf) @@ -390,14 +411,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) GElf_Ehdr ehdr; if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warning("failed to init libelf for %s\n", path); + pr_warn("failed to init libelf for %s\n", path); return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } fd = open(path, O_RDONLY); if (fd < 0) { err = -errno; - pr_warning("failed to open %s: %s\n", path, strerror(errno)); + pr_warn("failed to open %s: %s\n", path, strerror(errno)); return ERR_PTR(err); } @@ -405,19 +426,19 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) elf = elf_begin(fd, ELF_C_READ, NULL); if (!elf) { - pr_warning("failed to open %s as ELF file\n", path); + pr_warn("failed to open %s as ELF file\n", path); goto done; } if (!gelf_getehdr(elf, &ehdr)) { - pr_warning("failed to get EHDR from %s\n", path); + pr_warn("failed to get EHDR from %s\n", path); goto done; } if (!btf_check_endianness(&ehdr)) { - pr_warning("non-native ELF endianness is not supported\n"); + pr_warn("non-native ELF endianness is not supported\n"); goto done; } if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { - pr_warning("failed to get e_shstrndx from %s\n", path); + pr_warn("failed to get e_shstrndx from %s\n", path); goto done; } @@ -427,29 +448,29 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section(%d) header from %s\n", - idx, path); + pr_warn("failed to get section(%d) header from %s\n", + idx, path); goto done; } name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section(%d) name from %s\n", - idx, path); + pr_warn("failed to get section(%d) name from %s\n", + idx, path); goto done; } if (strcmp(name, BTF_ELF_SEC) == 0) { btf_data = elf_getdata(scn, 0); if (!btf_data) { - pr_warning("failed to get section(%d, %s) data from %s\n", - idx, name, path); + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); goto done; } continue; } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = elf_getdata(scn, 0); if (!btf_ext_data) { - pr_warning("failed to get section(%d, %s) data from %s\n", - idx, name, path); + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); goto done; } continue; @@ -600,9 +621,9 @@ int btf__load(struct btf *btf) log_buf, log_buf_size, false); if (btf->fd < 0) { err = -errno; - pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); + pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); if (*log_buf) - pr_warning("%s\n", log_buf); + pr_warn("%s\n", log_buf); goto done; } @@ -707,8 +728,8 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) { - pr_warning("map:%s length of '____btf_map_%s' is too long\n", - map_name, map_name); + pr_warn("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); return -EINVAL; } @@ -721,14 +742,14 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, container_type = btf__type_by_id(btf, container_id); if (!container_type) { - pr_warning("map:%s cannot find BTF type for container_id:%u\n", - map_name, container_id); + pr_warn("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); return -EINVAL; } if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { - pr_warning("map:%s container_name:%s is an invalid container struct\n", - map_name, container_name); + pr_warn("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); return -EINVAL; } @@ -737,25 +758,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, key_size = btf__resolve_size(btf, key->type); if (key_size < 0) { - pr_warning("map:%s invalid BTF key_type_size\n", map_name); + pr_warn("map:%s invalid BTF key_type_size\n", map_name); return key_size; } if (expected_key_size != key_size) { - pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map_name, (__u32)key_size, expected_key_size); + pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); return -EINVAL; } value_size = btf__resolve_size(btf, value->type); if (value_size < 0) { - pr_warning("map:%s invalid BTF value_type_size\n", map_name); + pr_warn("map:%s invalid BTF value_type_size\n", map_name); return value_size; } if (expected_value_size != value_size) { - pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map_name, (__u32)value_size, expected_value_size); + pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); return -EINVAL; } @@ -888,14 +909,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext) return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext) +static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext) { struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->offset_reloc_off, - .len = btf_ext->hdr->offset_reloc_len, - .min_rec_size = sizeof(struct bpf_offset_reloc), - .ext_info = &btf_ext->offset_reloc_info, - .desc = "offset_reloc", + .off = btf_ext->hdr->field_reloc_off, + .len = btf_ext->hdr->field_reloc_len, + .min_rec_size = sizeof(struct bpf_field_reloc), + .ext_info = &btf_ext->field_reloc_info, + .desc = "field_reloc", }; return btf_ext_setup_info(btf_ext, ¶m); @@ -975,9 +996,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) goto done; if (btf_ext->hdr->hdr_len < - offsetofend(struct btf_ext_header, offset_reloc_len)) + offsetofend(struct btf_ext_header, field_reloc_len)) goto done; - err = btf_ext_setup_offset_reloc(btf_ext); + err = btf_ext_setup_field_reloc(btf_ext); if (err) goto done; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 9cb44b4fbf60..d9ac73a02cde 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -60,8 +60,8 @@ struct btf_ext_header { __u32 line_info_len; /* optional part of .BTF.ext header */ - __u32 offset_reloc_off; - __u32 offset_reloc_len; + __u32 field_reloc_off; + __u32 field_reloc_len; }; LIBBPF_API void btf__free(struct btf *btf); @@ -72,6 +72,8 @@ LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, + const char *type_name, __u32 kind); LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index ede55fec3618..cb126d8fcf75 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -428,7 +428,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) /* type loop, but resolvable through fwd declaration */ if (btf_is_composite(t) && through_ptr && t->name_off != 0) return 0; - pr_warning("unsatisfiable type cycle, id:[%u]\n", id); + pr_warn("unsatisfiable type cycle, id:[%u]\n", id); return -ELOOP; } @@ -636,8 +636,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) if (id == cont_id) return; if (t->name_off == 0) { - pr_warning("anonymous struct/union loop, id:[%u]\n", - id); + pr_warn("anonymous struct/union loop, id:[%u]\n", + id); return; } btf_dump_emit_struct_fwd(d, id, t); @@ -782,7 +782,7 @@ static int btf_align_of(const struct btf *btf, __u32 id) return align; } default: - pr_warning("unsupported BTF_KIND:%u\n", btf_kind(t)); + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); return 1; } } @@ -876,7 +876,6 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, __u16 vlen = btf_vlen(t); packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; - align = packed ? 1 : btf_align_of(d->btf, id); btf_dump_printf(d, "%s%s%s {", is_struct ? "struct" : "union", @@ -906,6 +905,13 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, ";"); } + /* pad at the end, if necessary */ + if (is_struct) { + align = packed ? 1 : btf_align_of(d->btf, id); + btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, + lvl + 1); + } + if (vlen) btf_dump_printf(d, "\n"); btf_dump_printf(d, "%s}", pfx(lvl)); @@ -969,6 +975,17 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id, { const char *name = btf_dump_ident_name(d, id); + /* + * Old GCC versions are emitting invalid typedef for __gnuc_va_list + * pointing to VOID. This generates warnings from btf_dump() and + * results in uncompilable header file, so we are fixing it up here + * with valid typedef into __builtin_va_list. + */ + if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) { + btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list"); + return; + } + btf_dump_printf(d, "typedef "); btf_dump_emit_type_decl(d, t->type, name, lvl); } @@ -1050,7 +1067,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warning("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack:%d", err); d->decl_stack_cnt = stack_start; return; } @@ -1079,8 +1096,8 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, case BTF_KIND_TYPEDEF: goto done; default: - pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", - btf_kind(t), id); + pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", + btf_kind(t), id); goto done; } } @@ -1306,8 +1323,8 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, return; } default: - pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", - kind, id); + pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", + kind, id); return; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e0276520171b..b20f82e58989 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@ #include <linux/limits.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> +#include <linux/version.h> #include <sys/epoll.h> #include <sys/ioctl.h> #include <sys/mman.h> @@ -104,7 +105,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) err = action; \ if (err) \ goto out; \ -} while(0) +} while (0) /* Copied from tools/perf/util/util.h */ @@ -141,6 +142,8 @@ struct bpf_capabilities { __u32 btf_func:1; /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ __u32 btf_datasec:1; + /* BPF_F_MMAPABLE is supported for arrays */ + __u32 array_mmap:1; }; /* @@ -187,6 +190,8 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; + __u32 attach_btf_id; + __u32 attach_prog_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@ -225,6 +230,9 @@ struct bpf_map { void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; + char *pin_path; + bool pinned; + bool reused; }; struct bpf_secdata { @@ -248,6 +256,7 @@ struct bpf_object { bool loaded; bool has_pseudo_calls; + bool relaxed_core_relocs; /* * Information when doing elf related work. Only valid if fd @@ -255,7 +264,7 @@ struct bpf_object { */ struct { int fd; - void *obj_buf; + const void *obj_buf; size_t obj_buf_sz; Elf *elf; GElf_Ehdr ehdr; @@ -267,8 +276,8 @@ struct bpf_object { struct { GElf_Shdr shdr; Elf_Data *data; - } *reloc; - int nr_reloc; + } *reloc_sects; + int nr_reloc_sects; int maps_shndx; int btf_maps_shndx; int text_shndx; @@ -310,8 +319,8 @@ void bpf_program__unload(struct bpf_program *prog) for (i = 0; i < prog->instances.nr; i++) zclose(prog->instances.fds[i]); } else if (prog->instances.nr != -1) { - pr_warning("Internal error: instances.nr is %d\n", - prog->instances.nr); + pr_warn("Internal error: instances.nr is %d\n", + prog->instances.nr); } prog->instances.nr = -1; @@ -362,8 +371,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, const size_t bpf_insn_sz = sizeof(struct bpf_insn); if (size == 0 || size % bpf_insn_sz) { - pr_warning("corrupted section '%s', size: %zu\n", - section_name, size); + pr_warn("corrupted section '%s', size: %zu\n", + section_name, size); return -EINVAL; } @@ -371,22 +380,22 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog under section(%d) %s\n", - idx, section_name); + pr_warn("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); goto errout; } prog->pin_name = __bpf_program__pin_name(prog); if (!prog->pin_name) { - pr_warning("failed to alloc pin name for prog under section(%d) %s\n", - idx, section_name); + pr_warn("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); goto errout; } prog->insns = malloc(size); if (!prog->insns) { - pr_warning("failed to alloc insns for prog under section %s\n", - section_name); + pr_warn("failed to alloc insns for prog under section %s\n", + section_name); goto errout; } prog->insns_cnt = size / bpf_insn_sz; @@ -424,8 +433,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, * is still valid, so don't need special treat for * bpf_close_object(). */ - pr_warning("failed to alloc a new program under section '%s'\n", - section_name); + pr_warn("failed to alloc a new program under section '%s'\n", + section_name); bpf_program__exit(&prog); return -ENOMEM; } @@ -465,8 +474,8 @@ bpf_object__init_prog_names(struct bpf_object *obj) obj->efile.strtabidx, sym.st_name); if (!name) { - pr_warning("failed to get sym name string for prog %s\n", - prog->section_name); + pr_warn("failed to get sym name string for prog %s\n", + prog->section_name); return -LIBBPF_ERRNO__LIBELF; } } @@ -475,15 +484,15 @@ bpf_object__init_prog_names(struct bpf_object *obj) name = ".text"; if (!name) { - pr_warning("failed to find sym for prog %s\n", - prog->section_name); + pr_warn("failed to find sym for prog %s\n", + prog->section_name); return -EINVAL; } prog->name = strdup(name); if (!prog->name) { - pr_warning("failed to allocate memory for prog sym %s\n", - name); + pr_warn("failed to allocate memory for prog sym %s\n", + name); return -ENOMEM; } } @@ -491,25 +500,43 @@ bpf_object__init_prog_names(struct bpf_object *obj) return 0; } +static __u32 get_kernel_version(void) +{ + __u32 major, minor, patch; + struct utsname info; + + uname(&info); + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; + return KERNEL_VERSION(major, minor, patch); +} + static struct bpf_object *bpf_object__new(const char *path, - void *obj_buf, - size_t obj_buf_sz) + const void *obj_buf, + size_t obj_buf_sz, + const char *obj_name) { struct bpf_object *obj; char *end; obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); if (!obj) { - pr_warning("alloc memory failed for %s\n", path); + pr_warn("alloc memory failed for %s\n", path); return ERR_PTR(-ENOMEM); } strcpy(obj->path, path); - /* Using basename() GNU version which doesn't modify arg. */ - strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1); - end = strchr(obj->name, '.'); - if (end) - *end = 0; + if (obj_name) { + strncpy(obj->name, obj_name, sizeof(obj->name) - 1); + obj->name[sizeof(obj->name) - 1] = 0; + } else { + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(obj->name, basename((void *)path), + sizeof(obj->name) - 1); + end = strchr(obj->name, '.'); + if (end) + *end = 0; + } obj->efile.fd = -1; /* @@ -526,6 +553,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->kern_version = get_kernel_version(); obj->loaded = false; INIT_LIST_HEAD(&obj->list); @@ -547,8 +575,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj) obj->efile.rodata = NULL; obj->efile.bss = NULL; - zfree(&obj->efile.reloc); - obj->efile.nr_reloc = 0; + zfree(&obj->efile.reloc_sects); + obj->efile.nr_reloc_sects = 0; zclose(obj->efile.fd); obj->efile.obj_buf = NULL; obj->efile.obj_buf_sz = 0; @@ -560,7 +588,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) GElf_Ehdr *ep; if (obj_elf_valid(obj)) { - pr_warning("elf init: internal error\n"); + pr_warn("elf init: internal error\n"); return -LIBBPF_ERRNO__LIBELF; } @@ -569,7 +597,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) * obj_buf should have been validated by * bpf_object__open_buffer(). */ - obj->efile.elf = elf_memory(obj->efile.obj_buf, + obj->efile.elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz); } else { obj->efile.fd = open(obj->path, O_RDONLY); @@ -578,7 +606,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warning("failed to open %s: %s\n", obj->path, cp); + pr_warn("failed to open %s: %s\n", obj->path, cp); return err; } @@ -587,13 +615,13 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (!obj->efile.elf) { - pr_warning("failed to open %s as ELF file\n", obj->path); + pr_warn("failed to open %s as ELF file\n", obj->path); err = -LIBBPF_ERRNO__LIBELF; goto errout; } if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { - pr_warning("failed to get EHDR from %s\n", obj->path); + pr_warn("failed to get EHDR from %s\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } @@ -602,7 +630,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) /* Old LLVM set e_machine to EM_NONE */ if (ep->e_type != ET_REL || (ep->e_machine && ep->e_machine != EM_BPF)) { - pr_warning("%s is not an eBPF object file\n", obj->path); + pr_warn("%s is not an eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } @@ -624,7 +652,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj) #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warning("endianness mismatch.\n"); + pr_warn("endianness mismatch.\n"); return -LIBBPF_ERRNO__ENDIAN; } @@ -642,7 +670,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) __u32 kver; if (size != sizeof(kver)) { - pr_warning("invalid kver section in %s\n", obj->path); + pr_warn("invalid kver section in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } memcpy(&kver, data, sizeof(kver)); @@ -684,15 +712,15 @@ static int bpf_object_search_section_size(const struct bpf_object *obj, idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section(%d) header from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) header from %s\n", + idx, obj->path); return -EIO; } sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!sec_name) { - pr_warning("failed to get section(%d) name from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) name from %s\n", + idx, obj->path); return -EIO; } @@ -701,8 +729,8 @@ static int bpf_object_search_section_size(const struct bpf_object *obj, data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); + pr_warn("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); return -EIO; } @@ -762,8 +790,8 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); if (!sname) { - pr_warning("failed to get sym name string for var %s\n", - name); + pr_warn("failed to get sym name string for var %s\n", + name); return -EIO; } if (strcmp(name, sname) == 0) { @@ -787,7 +815,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) new_cap = max((size_t)4, obj->maps_cap * 3 / 2); new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); if (!new_maps) { - pr_warning("alloc maps for object failed\n"); + pr_warn("alloc maps for object failed\n"); return ERR_PTR(-ENOMEM); } @@ -828,11 +856,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, libbpf_type_to_btf_name[type]); map->name = strdup(map_name); if (!map->name) { - pr_warning("failed to alloc map name\n"); + pr_warn("failed to alloc map name\n"); return -ENOMEM; } - pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n", - map_name, map->sec_idx, map->sec_offset); def = &map->def; def->type = BPF_MAP_TYPE_ARRAY; @@ -840,11 +866,17 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def->value_size = data->d_size; def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; + if (obj->caps.array_mmap) + def->map_flags |= BPF_F_MMAPABLE; + + pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", + map_name, map->sec_idx, map->sec_offset, def->map_flags); + if (data_buff) { *data_buff = malloc(data->d_size); if (!*data_buff) { zfree(&map->name); - pr_warning("failed to alloc map content buffer\n"); + pr_warn("failed to alloc map content buffer\n"); return -ENOMEM; } memcpy(*data_buff, data->d_buf, data->d_size); @@ -906,8 +938,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (scn) data = elf_getdata(scn, NULL); if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); + pr_warn("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); return -EINVAL; } @@ -932,13 +964,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path, nr_maps, data->d_size); - map_def_sz = data->d_size / nr_maps; - if (!data->d_size || (data->d_size % nr_maps) != 0) { - pr_warning("unable to determine map definition size " - "section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); + if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { + pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); return -EINVAL; } + map_def_sz = data->d_size / nr_maps; /* Fill obj->maps using data in "maps" section. */ for (i = 0; i < nr_syms; i++) { @@ -959,8 +990,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); if (!map_name) { - pr_warning("failed to get map #%d name sym string for obj %s\n", - i, obj->path); + pr_warn("failed to get map #%d name sym string for obj %s\n", + i, obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -970,14 +1001,14 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); if (sym.st_value + map_def_sz > data->d_size) { - pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); + pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", + obj->path, map_name); return -EINVAL; } map->name = strdup(map_name); if (!map->name) { - pr_warning("failed to alloc map name\n"); + pr_warn("failed to alloc map name\n"); return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); @@ -998,13 +1029,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) * incompatible. */ char *b; + for (b = ((char *)def) + sizeof(struct bpf_map_def); b < ((char *)def) + map_def_sz; b++) { if (*b != 0) { - pr_warning("maps section in %s: \"%s\" " - "has unrecognized, non-zero " - "options\n", - obj->path, map_name); + pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", + obj->path, map_name); if (strict) return -EINVAL; } @@ -1041,27 +1071,28 @@ skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) */ static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *def, - const struct btf_member *m, __u32 *res) { + const struct btf_member *m, __u32 *res) +{ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); const char *name = btf__name_by_offset(btf, m->name_off); const struct btf_array *arr_info; const struct btf_type *arr_t; if (!btf_is_ptr(t)) { - pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, btf_kind(t)); + pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", + map_name, name, btf_kind(t)); return false; } arr_t = btf__type_by_id(btf, t->type); if (!arr_t) { - pr_warning("map '%s': attr '%s': type [%u] not found.\n", - map_name, name, t->type); + pr_warn("map '%s': attr '%s': type [%u] not found.\n", + map_name, name, t->type); return false; } if (!btf_is_array(arr_t)) { - pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, btf_kind(arr_t)); + pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", + map_name, name, btf_kind(arr_t)); return false; } arr_info = btf_array(arr_t); @@ -1069,10 +1100,32 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, return true; } +static int build_map_pin_path(struct bpf_map *map, const char *path) +{ + char buf[PATH_MAX]; + int err, len; + + if (!path) + path = "/sys/fs/bpf"; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + err = bpf_map__set_pin_path(map, buf); + if (err) + return err; + + return 0; +} + static int bpf_object__init_user_btf_map(struct bpf_object *obj, const struct btf_type *sec, int var_idx, int sec_idx, - const Elf_Data *data, bool strict) + const Elf_Data *data, bool strict, + const char *pin_root_path) { const struct btf_type *var, *def, *t; const struct btf_var_secinfo *vi; @@ -1089,33 +1142,33 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, vlen = btf_vlen(var); if (map_name == NULL || map_name[0] == '\0') { - pr_warning("map #%d: empty name.\n", var_idx); + pr_warn("map #%d: empty name.\n", var_idx); return -EINVAL; } if ((__u64)vi->offset + vi->size > data->d_size) { - pr_warning("map '%s' BTF data is corrupted.\n", map_name); + pr_warn("map '%s' BTF data is corrupted.\n", map_name); return -EINVAL; } if (!btf_is_var(var)) { - pr_warning("map '%s': unexpected var kind %u.\n", - map_name, btf_kind(var)); + pr_warn("map '%s': unexpected var kind %u.\n", + map_name, btf_kind(var)); return -EINVAL; } if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && var_extra->linkage != BTF_VAR_STATIC) { - pr_warning("map '%s': unsupported var linkage %u.\n", - map_name, var_extra->linkage); + pr_warn("map '%s': unsupported var linkage %u.\n", + map_name, var_extra->linkage); return -EOPNOTSUPP; } def = skip_mods_and_typedefs(obj->btf, var->type, NULL); if (!btf_is_struct(def)) { - pr_warning("map '%s': unexpected def kind %u.\n", - map_name, btf_kind(var)); + pr_warn("map '%s': unexpected def kind %u.\n", + map_name, btf_kind(var)); return -EINVAL; } if (def->size > vi->size) { - pr_warning("map '%s': invalid def size.\n", map_name); + pr_warn("map '%s': invalid def size.\n", map_name); return -EINVAL; } @@ -1124,7 +1177,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return PTR_ERR(map); map->name = strdup(map_name); if (!map->name) { - pr_warning("map '%s': failed to alloc map name.\n", map_name); + pr_warn("map '%s': failed to alloc map name.\n", map_name); return -ENOMEM; } map->libbpf_type = LIBBPF_MAP_UNSPEC; @@ -1140,8 +1193,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, const char *name = btf__name_by_offset(obj->btf, m->name_off); if (!name) { - pr_warning("map '%s': invalid field #%d.\n", - map_name, i); + pr_warn("map '%s': invalid field #%d.\n", map_name, i); return -EINVAL; } if (strcmp(name, "type") == 0) { @@ -1171,8 +1223,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found key_size = %u.\n", map_name, sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warning("map '%s': conflicting key size %u != %u.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %u.\n", + map_name, map->def.key_size, sz); return -EINVAL; } map->def.key_size = sz; @@ -1181,26 +1233,26 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, t = btf__type_by_id(obj->btf, m->type); if (!t) { - pr_warning("map '%s': key type [%d] not found.\n", - map_name, m->type); + pr_warn("map '%s': key type [%d] not found.\n", + map_name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { - pr_warning("map '%s': key spec is not PTR: %u.\n", - map_name, btf_kind(t)); + pr_warn("map '%s': key spec is not PTR: %u.\n", + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", + map_name, t->type, sz); return sz; } pr_debug("map '%s': found key [%u], sz = %lld.\n", map_name, t->type, sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warning("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %lld.\n", + map_name, map->def.key_size, sz); return -EINVAL; } map->def.key_size = sz; @@ -1214,8 +1266,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found value_size = %u.\n", map_name, sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warning("map '%s': conflicting value size %u != %u.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %u.\n", + map_name, map->def.value_size, sz); return -EINVAL; } map->def.value_size = sz; @@ -1224,34 +1276,58 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, t = btf__type_by_id(obj->btf, m->type); if (!t) { - pr_warning("map '%s': value type [%d] not found.\n", - map_name, m->type); + pr_warn("map '%s': value type [%d] not found.\n", + map_name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { - pr_warning("map '%s': value spec is not PTR: %u.\n", - map_name, btf_kind(t)); + pr_warn("map '%s': value spec is not PTR: %u.\n", + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", + map_name, t->type, sz); return sz; } pr_debug("map '%s': found value [%u], sz = %lld.\n", map_name, t->type, sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warning("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %lld.\n", + map_name, map->def.value_size, sz); return -EINVAL; } map->def.value_size = sz; map->btf_value_type_id = t->type; + } else if (strcmp(name, "pinning") == 0) { + __u32 val; + int err; + + if (!get_map_field_int(map_name, obj->btf, def, m, + &val)) + return -EINVAL; + pr_debug("map '%s': found pinning = %u.\n", + map_name, val); + + if (val != LIBBPF_PIN_NONE && + val != LIBBPF_PIN_BY_NAME) { + pr_warn("map '%s': invalid pinning value %u.\n", + map_name, val); + return -EINVAL; + } + if (val == LIBBPF_PIN_BY_NAME) { + err = build_map_pin_path(map, pin_root_path); + if (err) { + pr_warn("map '%s': couldn't build pin path.\n", + map_name); + return err; + } + } } else { if (strict) { - pr_warning("map '%s': unknown field '%s'.\n", - map_name, name); + pr_warn("map '%s': unknown field '%s'.\n", + map_name, name); return -ENOTSUP; } pr_debug("map '%s': ignoring unknown field '%s'.\n", @@ -1260,14 +1336,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } if (map->def.type == BPF_MAP_TYPE_UNSPEC) { - pr_warning("map '%s': map type isn't specified.\n", map_name); + pr_warn("map '%s': map type isn't specified.\n", map_name); return -EINVAL; } return 0; } -static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) +static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, + const char *pin_root_path) { const struct btf_type *sec = NULL; int nr_types, i, vlen, err; @@ -1283,8 +1360,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) if (scn) data = elf_getdata(scn, NULL); if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + pr_warn("failed to get Elf_Data from map section %d (%s)\n", + obj->efile.maps_shndx, MAPS_ELF_SEC); return -EINVAL; } @@ -1301,7 +1378,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) } if (!sec) { - pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC); + pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); return -ENOENT; } @@ -1309,7 +1386,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) for (i = 0; i < vlen; i++) { err = bpf_object__init_user_btf_map(obj, sec, i, obj->efile.btf_maps_shndx, - data, strict); + data, strict, + pin_root_path); if (err) return err; } @@ -1317,16 +1395,17 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, int flags) +static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, + const char *pin_root_path) { - bool strict = !(flags & MAPS_RELAX_COMPAT); + bool strict = !relaxed_maps; int err; err = bpf_object__init_user_maps(obj, strict); if (err) return err; - err = bpf_object__init_user_btf_maps(obj, strict); + err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); if (err) return err; @@ -1445,14 +1524,13 @@ static int bpf_object__init_btf(struct bpf_object *obj, if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); if (IS_ERR(obj->btf)) { - pr_warning("Error loading ELF section %s: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %d.\n", + BTF_ELF_SEC, err); goto out; } err = btf__finalize_data(obj, obj->btf); if (err) { - pr_warning("Error finalizing %s: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); goto out; } } @@ -1465,8 +1543,8 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); if (IS_ERR(obj->btf_ext)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); + pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); obj->btf_ext = NULL; goto out; } @@ -1482,7 +1560,7 @@ out: obj->btf = NULL; } if (btf_required && !obj->btf) { - pr_warning("BTF is required, but is missing or corrupted.\n"); + pr_warn("BTF is required, but is missing or corrupted.\n"); return err == 0 ? -ENOENT : err; } return 0; @@ -1500,8 +1578,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) err = btf__load(obj->btf); if (err) { - pr_warning("Error loading %s into kernel: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error loading %s into kernel: %d.\n", + BTF_ELF_SEC, err); btf__free(obj->btf); obj->btf = NULL; /* btf_ext can't exist without btf, so free it as well */ @@ -1516,7 +1594,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, int flags) +static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, + const char *pin_root_path) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1527,7 +1606,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) /* Elf is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { - pr_warning("failed to get e_shstrndx from %s\n", obj->path); + pr_warn("failed to get e_shstrndx from %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -1538,22 +1617,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section(%d) header from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) header from %s\n", + idx, obj->path); return -LIBBPF_ERRNO__FORMAT; } name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section(%d) name from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) name from %s\n", + idx, obj->path); return -LIBBPF_ERRNO__FORMAT; } data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); + pr_warn("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); return -LIBBPF_ERRNO__FORMAT; } pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", @@ -1583,8 +1662,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { - pr_warning("bpf: multiple SYMTAB in %s\n", - obj->path); + pr_warn("bpf: multiple SYMTAB in %s\n", + obj->path); return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; @@ -1594,14 +1673,16 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) if (strcmp(name, ".text") == 0) obj->efile.text_shndx = idx; err = bpf_object__add_program(obj, data->d_buf, - data->d_size, name, idx); + data->d_size, + name, idx); if (err) { char errmsg[STRERR_BUFSIZE]; - char *cp = libbpf_strerror_r(-err, errmsg, - sizeof(errmsg)); + char *cp; - pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, cp); + cp = libbpf_strerror_r(-err, errmsg, + sizeof(errmsg)); + pr_warn("failed to alloc program %s (%s): %s", + name, obj->path, cp); return err; } } else if (strcmp(name, ".data") == 0) { @@ -1614,8 +1695,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_debug("skip section(%d) %s\n", idx, name); } } else if (sh.sh_type == SHT_REL) { - int nr_reloc = obj->efile.nr_reloc; - void *reloc = obj->efile.reloc; + int nr_sects = obj->efile.nr_reloc_sects; + void *sects = obj->efile.reloc_sects; int sec = sh.sh_info; /* points to other section */ /* Only do relo for section with exec instructions */ @@ -1625,18 +1706,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) continue; } - reloc = reallocarray(reloc, nr_reloc + 1, - sizeof(*obj->efile.reloc)); - if (!reloc) { - pr_warning("realloc failed\n"); + sects = reallocarray(sects, nr_sects + 1, + sizeof(*obj->efile.reloc_sects)); + if (!sects) { + pr_warn("reloc_sects realloc failed\n"); return -ENOMEM; } - obj->efile.reloc = reloc; - obj->efile.nr_reloc++; + obj->efile.reloc_sects = sects; + obj->efile.nr_reloc_sects++; - obj->efile.reloc[nr_reloc].shdr = sh; - obj->efile.reloc[nr_reloc].data = data; + obj->efile.reloc_sects[nr_sects].shdr = sh; + obj->efile.reloc_sects[nr_sects].data = data; } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; @@ -1645,13 +1726,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) } } - if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { - pr_warning("Corrupted ELF file: index of strtab invalid\n"); + if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { + pr_warn("Corrupted ELF file: index of strtab invalid\n"); return -LIBBPF_ERRNO__FORMAT; } err = bpf_object__init_btf(obj, btf_data, btf_ext_data); if (!err) - err = bpf_object__init_maps(obj, flags); + err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); if (!err) err = bpf_object__sanitize_and_load_btf(obj); if (!err) @@ -1701,14 +1782,6 @@ static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, shndx == obj->efile.btf_maps_shndx; } -static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, - int shndx) -{ - return shndx == obj->efile.text_shndx || - bpf_object__shndx_is_maps(obj, shndx) || - bpf_object__shndx_is_data(obj, shndx); -} - static enum libbpf_map_type bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) { @@ -1722,130 +1795,162 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) return LIBBPF_MAP_UNSPEC; } +static int bpf_program__record_reloc(struct bpf_program *prog, + struct reloc_desc *reloc_desc, + __u32 insn_idx, const char *name, + const GElf_Sym *sym, const GElf_Rel *rel) +{ + struct bpf_insn *insn = &prog->insns[insn_idx]; + size_t map_idx, nr_maps = prog->obj->nr_maps; + struct bpf_object *obj = prog->obj; + __u32 shdr_idx = sym->st_shndx; + enum libbpf_map_type type; + struct bpf_map *map; + + /* sub-program call relocation */ + if (insn->code == (BPF_JMP | BPF_CALL)) { + if (insn->src_reg != BPF_PSEUDO_CALL) { + pr_warn("incorrect bpf_call opcode\n"); + return -LIBBPF_ERRNO__RELOC; + } + /* text_shndx can be 0, if no default "main" program exists */ + if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { + pr_warn("bad call relo against section %u\n", shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + if (sym->st_value % 8) { + pr_warn("bad call relo offset: %lu\n", sym->st_value); + return -LIBBPF_ERRNO__RELOC; + } + reloc_desc->type = RELO_CALL; + reloc_desc->insn_idx = insn_idx; + reloc_desc->text_off = sym->st_value / 8; + obj->has_pseudo_calls = true; + return 0; + } + + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { + pr_warn("invalid relo for insns[%d].code 0x%x\n", + insn_idx, insn->code); + return -LIBBPF_ERRNO__RELOC; + } + if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { + pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", + name, shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + + /* generic map reference relocation */ + if (type == LIBBPF_MAP_UNSPEC) { + if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { + pr_warn("bad map relo against section %u\n", + shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + map = &obj->maps[map_idx]; + if (map->libbpf_type != type || + map->sec_idx != sym->st_shndx || + map->sec_offset != sym->st_value) + continue; + pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n", + map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); + break; + } + if (map_idx >= nr_maps) { + pr_warn("map relo failed to find map for sec %u, off %llu\n", + shdr_idx, (__u64)sym->st_value); + return -LIBBPF_ERRNO__RELOC; + } + reloc_desc->type = RELO_LD64; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = map_idx; + return 0; + } + + /* global data map relocation */ + if (!bpf_object__shndx_is_data(obj, shdr_idx)) { + pr_warn("bad data relo against section %u\n", shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + if (!obj->caps.global_data) { + pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", + name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + map = &obj->maps[map_idx]; + if (map->libbpf_type != type) + continue; + pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n", + map_idx, map->name, map->sec_idx, map->sec_offset, + insn_idx); + break; + } + if (map_idx >= nr_maps) { + pr_warn("data relo failed to find map for sec %u\n", + shdr_idx); + return -LIBBPF_ERRNO__RELOC; + } + + reloc_desc->type = RELO_DATA; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = map_idx; + return 0; +} + static int bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, Elf_Data *data, struct bpf_object *obj) { Elf_Data *symbols = obj->efile.symbols; - struct bpf_map *maps = obj->maps; - size_t nr_maps = obj->nr_maps; - int i, nrels; + int err, i, nrels; pr_debug("collecting relocating info for: '%s'\n", prog->section_name); nrels = shdr->sh_size / shdr->sh_entsize; prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); if (!prog->reloc_desc) { - pr_warning("failed to alloc memory in relocation\n"); + pr_warn("failed to alloc memory in relocation\n"); return -ENOMEM; } prog->nr_reloc = nrels; for (i = 0; i < nrels; i++) { - struct bpf_insn *insns = prog->insns; - enum libbpf_map_type type; - unsigned int insn_idx; - unsigned int shdr_idx; const char *name; - size_t map_idx; + __u32 insn_idx; GElf_Sym sym; GElf_Rel rel; if (!gelf_getrel(data, i, &rel)) { - pr_warning("relocation: failed to get %d reloc\n", i); + pr_warn("relocation: failed to get %d reloc\n", i); return -LIBBPF_ERRNO__FORMAT; } - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { - pr_warning("relocation: symbol %"PRIx64" not found\n", - GELF_R_SYM(rel.r_info)); + pr_warn("relocation: symbol %"PRIx64" not found\n", + GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } + if (rel.r_offset % sizeof(struct bpf_insn)) + return -LIBBPF_ERRNO__FORMAT; + insn_idx = rel.r_offset / sizeof(struct bpf_insn); name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name) ? : "<?>"; - pr_debug("relo for %lld value %lld name %d (\'%s\')\n", - (long long) (rel.r_info >> 32), - (long long) sym.st_value, sym.st_name, name); + pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), + (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + GELF_ST_BIND(sym.st_info), sym.st_name, name, + insn_idx); - shdr_idx = sym.st_shndx; - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n", - insn_idx, shdr_idx); - - if (shdr_idx >= SHN_LORESERVE) { - pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n", - name, shdr_idx, insn_idx, - insns[insn_idx].code); - return -LIBBPF_ERRNO__RELOC; - } - if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { - pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", - prog->section_name, shdr_idx); - return -LIBBPF_ERRNO__RELOC; - } - - if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { - if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { - pr_warning("incorrect bpf_call opcode\n"); - return -LIBBPF_ERRNO__RELOC; - } - prog->reloc_desc[i].type = RELO_CALL; - prog->reloc_desc[i].insn_idx = insn_idx; - prog->reloc_desc[i].text_off = sym.st_value; - obj->has_pseudo_calls = true; - continue; - } - - if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { - pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", - insn_idx, insns[insn_idx].code); - return -LIBBPF_ERRNO__RELOC; - } - - if (bpf_object__shndx_is_maps(obj, shdr_idx) || - bpf_object__shndx_is_data(obj, shdr_idx)) { - type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); - if (type != LIBBPF_MAP_UNSPEC) { - if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { - pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", - name, insn_idx, insns[insn_idx].code); - return -LIBBPF_ERRNO__RELOC; - } - if (!obj->caps.global_data) { - pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); - return -LIBBPF_ERRNO__RELOC; - } - } - - for (map_idx = 0; map_idx < nr_maps; map_idx++) { - if (maps[map_idx].libbpf_type != type) - continue; - if (type != LIBBPF_MAP_UNSPEC || - (maps[map_idx].sec_idx == sym.st_shndx && - maps[map_idx].sec_offset == sym.st_value)) { - pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n", - map_idx, maps[map_idx].name, - maps[map_idx].sec_idx, - maps[map_idx].sec_offset, - insn_idx); - break; - } - } - - if (map_idx >= nr_maps) { - pr_warning("bpf relocation: map_idx %d larger than %d\n", - (int)map_idx, (int)nr_maps - 1); - return -LIBBPF_ERRNO__RELOC; - } - - prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ? - RELO_DATA : RELO_LD64; - prog->reloc_desc[i].insn_idx = insn_idx; - prog->reloc_desc[i].map_idx = map_idx; - } + err = bpf_program__record_reloc(prog, &prog->reloc_desc[i], + insn_idx, name, &sym, &rel); + if (err) + return err; } return 0; } @@ -1897,16 +2002,22 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) return -errno; new_fd = open("/", O_RDONLY | O_CLOEXEC); - if (new_fd < 0) + if (new_fd < 0) { + err = -errno; goto err_free_new_name; + } new_fd = dup3(fd, new_fd, O_CLOEXEC); - if (new_fd < 0) + if (new_fd < 0) { + err = -errno; goto err_close_new_fd; + } err = zclose(map->fd); - if (err) + if (err) { + err = -errno; goto err_close_new_fd; + } free(map->name); map->fd = new_fd; @@ -1918,6 +2029,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) map->def.map_flags = info.map_flags; map->btf_key_type_id = info.btf_key_type_id; map->btf_value_type_id = info.btf_value_type_id; + map->reused = true; return 0; @@ -1925,7 +2037,7 @@ err_close_new_fd: close(new_fd); err_free_new_name: free(new_name); - return -errno; + return err; } int bpf_map__resize(struct bpf_map *map, __u32 max_entries) @@ -1964,8 +2076,8 @@ bpf_object__probe_name(struct bpf_object *obj) ret = bpf_load_program_xattr(&attr, NULL, 0); if (ret < 0) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", - __func__, cp, errno); + pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); return -errno; } close(ret); @@ -2005,8 +2117,8 @@ bpf_object__probe_global_data(struct bpf_object *obj) map = bpf_create_map_xattr(&map_attr); if (map < 0) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, errno); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, errno); return -errno; } @@ -2030,7 +2142,7 @@ bpf_object__probe_global_data(struct bpf_object *obj) static int bpf_object__probe_btf_func(struct bpf_object *obj) { - const char strs[] = "\0int\0x\0a"; + static const char strs[] = "\0int\0x\0a"; /* void x(int a) {} */ __u32 types[] = { /* int */ @@ -2056,7 +2168,7 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj) static int bpf_object__probe_btf_datasec(struct bpf_object *obj) { - const char strs[] = "\0x\0.data"; + static const char strs[] = "\0x\0.data"; /* static int a; */ __u32 types[] = { /* int */ @@ -2081,6 +2193,27 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj) return 0; } +static int bpf_object__probe_array_mmap(struct bpf_object *obj) +{ + struct bpf_create_map_attr attr = { + .map_type = BPF_MAP_TYPE_ARRAY, + .map_flags = BPF_F_MMAPABLE, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, + }; + int fd; + + fd = bpf_create_map_xattr(&attr); + if (fd >= 0) { + obj->caps.array_mmap = 1; + close(fd); + return 1; + } + + return 0; +} + static int bpf_object__probe_caps(struct bpf_object *obj) { @@ -2089,6 +2222,7 @@ bpf_object__probe_caps(struct bpf_object *obj) bpf_object__probe_global_data, bpf_object__probe_btf_func, bpf_object__probe_btf_datasec, + bpf_object__probe_array_mmap, }; int i, ret; @@ -2101,6 +2235,66 @@ bpf_object__probe_caps(struct bpf_object *obj) return 0; } +static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) +{ + struct bpf_map_info map_info = {}; + char msg[STRERR_BUFSIZE]; + __u32 map_info_len; + + map_info_len = sizeof(map_info); + + if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { + pr_warn("failed to get map info for map FD %d: %s\n", + map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); + return false; + } + + return (map_info.type == map->def.type && + map_info.key_size == map->def.key_size && + map_info.value_size == map->def.value_size && + map_info.max_entries == map->def.max_entries && + map_info.map_flags == map->def.map_flags); +} + +static int +bpf_object__reuse_map(struct bpf_map *map) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + int err, pin_fd; + + pin_fd = bpf_obj_get(map->pin_path); + if (pin_fd < 0) { + err = -errno; + if (err == -ENOENT) { + pr_debug("found no pinned map to reuse at '%s'\n", + map->pin_path); + return 0; + } + + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("couldn't retrieve pinned map '%s': %s\n", + map->pin_path, cp); + return err; + } + + if (!map_is_reuse_compat(map, pin_fd)) { + pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", + map->pin_path); + close(pin_fd); + return -EINVAL; + } + + err = bpf_map__reuse_fd(map, pin_fd); + if (err) { + close(pin_fd); + return err; + } + map->pinned = true; + pr_debug("reused pinned map at '%s'\n", map->pin_path); + + return 0; +} + static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { @@ -2121,8 +2315,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("Error freezing map(%s) as read-only: %s\n", + map->name, cp); err = 0; } } @@ -2143,6 +2337,15 @@ bpf_object__create_maps(struct bpf_object *obj) char *cp, errmsg[STRERR_BUFSIZE]; int *pfd = &map->fd; + if (map->pin_path) { + err = bpf_object__reuse_map(map); + if (err) { + pr_warn("error reusing pinned map %s\n", + map->name); + return err; + } + } + if (map->fd >= 0) { pr_debug("skip map create (preset) %s: fd=%d\n", map->name, map->fd); @@ -2161,8 +2364,8 @@ bpf_object__create_maps(struct bpf_object *obj) if (!nr_cpus) nr_cpus = libbpf_num_possible_cpus(); if (nr_cpus < 0) { - pr_warning("failed to determine number of system CPUs: %d\n", - nr_cpus); + pr_warn("failed to determine number of system CPUs: %d\n", + nr_cpus); err = nr_cpus; goto err_out; } @@ -2190,8 +2393,8 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.btf_value_type_id)) { err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, cp, err); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -2206,8 +2409,8 @@ bpf_object__create_maps(struct bpf_object *obj) err = -errno; err_out: cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warning("failed to create map (name: '%s'): %s(%d)\n", - map->name, cp, err); + pr_warn("failed to create map (name: '%s'): %s(%d)\n", + map->name, cp, err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -2221,6 +2424,15 @@ err_out: } } + if (map->pin_path && !map->pinned) { + err = bpf_map__pin(map, NULL); + if (err) { + pr_warn("failed to auto-pin map name '%s' at '%s'\n", + map->name, map->pin_path); + return err; + } + } + pr_debug("created map %s: fd=%d\n", map->name, *pfd); } @@ -2232,8 +2444,8 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err, void *btf_prog_info, const char *info_name) { if (err != -ENOENT) { - pr_warning("Error in loading %s for sec %s.\n", - info_name, prog->section_name); + pr_warn("Error in loading %s for sec %s.\n", + info_name, prog->section_name); return err; } @@ -2244,14 +2456,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err, * Some info has already been found but has problem * in the last btf_ext reloc. Must have to error out. */ - pr_warning("Error in relocating %s for sec %s.\n", - info_name, prog->section_name); + pr_warn("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); return err; } /* Have problem loading the very first info. Ignore the rest. */ - pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", - info_name, prog->section_name, info_name); + pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); return 0; } @@ -2315,8 +2527,8 @@ struct bpf_core_spec { int raw_spec[BPF_CORE_SPEC_MAX_LEN]; /* raw spec length */ int raw_len; - /* field byte offset represented by spec */ - __u32 offset; + /* field bit offset represented by spec */ + __u32 bit_offset; }; static bool str_is_empty(const char *s) @@ -2325,10 +2537,10 @@ static bool str_is_empty(const char *s) } /* - * Turn bpf_offset_reloc into a low- and high-level spec representation, + * Turn bpf_field_reloc into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting - * field offset (in bytes), specified by accessor string. Low-level spec - * captures every single level of nestedness, including traversing anonymous + * field bit offset, specified by accessor string. Low-level spec captures + * every single level of nestedness, including traversing anonymous * struct/union members. High-level one only captures semantically meaningful * "turning points": named fields and array indicies. * E.g., for this case: @@ -2400,7 +2612,7 @@ static int bpf_core_spec_parse(const struct btf *btf, sz = btf__resolve_size(btf, id); if (sz < 0) return sz; - spec->offset = access_idx * sz; + spec->bit_offset = access_idx * sz * 8; for (i = 1; i < spec->raw_len; i++) { t = skip_mods_and_typedefs(btf, id, &id); @@ -2411,17 +2623,13 @@ static int bpf_core_spec_parse(const struct btf *btf, if (btf_is_composite(t)) { const struct btf_member *m; - __u32 offset; + __u32 bit_offset; if (access_idx >= btf_vlen(t)) return -EINVAL; - if (btf_member_bitfield_size(t, access_idx)) - return -EINVAL; - offset = btf_member_bit_offset(t, access_idx); - if (offset % 8) - return -EINVAL; - spec->offset += offset / 8; + bit_offset = btf_member_bit_offset(t, access_idx); + spec->bit_offset += bit_offset; m = btf_members(t) + access_idx; if (m->name_off) { @@ -2450,10 +2658,10 @@ static int bpf_core_spec_parse(const struct btf *btf, sz = btf__resolve_size(btf, id); if (sz < 0) return sz; - spec->offset += access_idx * sz; + spec->bit_offset += access_idx * sz * 8; } else { - pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", - type_id, spec_str, i, id, btf_kind(t)); + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", + type_id, spec_str, i, id, btf_kind(t)); return -EINVAL; } } @@ -2551,12 +2759,14 @@ err_out: } /* Check two types for compatibility, skipping const/volatile/restrict and - * typedefs, to ensure we are relocating offset to the compatible entities: + * typedefs, to ensure we are relocating compatible entities: * - any two STRUCTs/UNIONs are compatible and can be mixed; - * - any two FWDs are compatible; + * - any two FWDs are compatible, if their names match (modulo flavor suffix); * - any two PTRs are always compatible; + * - for ENUMs, names should be the same (ignoring flavor suffix) or at + * least one of enums should be anonymous; * - for ENUMs, check sizes, names are ignored; - * - for INT, size and bitness should match, signedness is ignored; + * - for INT, size and signedness are ignored; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; * - everything else shouldn't be ever a target of relocation. @@ -2582,23 +2792,36 @@ recur: return 0; switch (btf_kind(local_type)) { - case BTF_KIND_FWD: case BTF_KIND_PTR: return 1; - case BTF_KIND_ENUM: - return local_type->size == targ_type->size; + case BTF_KIND_FWD: + case BTF_KIND_ENUM: { + const char *local_name, *targ_name; + size_t local_len, targ_len; + + local_name = btf__name_by_offset(local_btf, + local_type->name_off); + targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); + local_len = bpf_core_essential_name_len(local_name); + targ_len = bpf_core_essential_name_len(targ_name); + /* one of them is anonymous or both w/ same flavor-less names */ + return local_len == 0 || targ_len == 0 || + (local_len == targ_len && + strncmp(local_name, targ_name, local_len) == 0); + } case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ return btf_int_offset(local_type) == 0 && - btf_int_offset(targ_type) == 0 && - local_type->size == targ_type->size && - btf_int_bits(local_type) == btf_int_bits(targ_type); + btf_int_offset(targ_type) == 0; case BTF_KIND_ARRAY: local_id = btf_array(local_type)->type; targ_id = btf_array(targ_type)->type; goto recur; default: - pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); return 0; } } @@ -2607,7 +2830,7 @@ recur: * Given single high-level named field accessor in local type, find * corresponding high-level accessor for a target type. Along the way, * maintain low-level spec for target as well. Also keep updating target - * offset. + * bit offset. * * Searching is performed through recursive exhaustive enumeration of all * fields of a struct/union. If there are any anonymous (embedded) @@ -2646,21 +2869,16 @@ static int bpf_core_match_member(const struct btf *local_btf, n = btf_vlen(targ_type); m = btf_members(targ_type); for (i = 0; i < n; i++, m++) { - __u32 offset; + __u32 bit_offset; - /* bitfield relocations not supported */ - if (btf_member_bitfield_size(targ_type, i)) - continue; - offset = btf_member_bit_offset(targ_type, i); - if (offset % 8) - continue; + bit_offset = btf_member_bit_offset(targ_type, i); /* too deep struct/union/array nesting */ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) return -E2BIG; /* speculate this member will be the good one */ - spec->offset += offset / 8; + spec->bit_offset += bit_offset; spec->raw_spec[spec->raw_len++] = i; targ_name = btf__name_by_offset(targ_btf, m->name_off); @@ -2689,7 +2907,7 @@ static int bpf_core_match_member(const struct btf *local_btf, return found; } /* member turned out not to be what we looked for */ - spec->offset -= offset / 8; + spec->bit_offset -= bit_offset; spec->raw_len--; } @@ -2698,7 +2916,7 @@ static int bpf_core_match_member(const struct btf *local_btf, /* * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + offset). + * target spec (high-level, low-level + bit offset). */ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, const struct btf *targ_btf, __u32 targ_id, @@ -2761,35 +2979,165 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, sz = btf__resolve_size(targ_btf, targ_id); if (sz < 0) return sz; - targ_spec->offset += local_acc->idx * sz; + targ_spec->bit_offset += local_acc->idx * sz * 8; } } return 1; } +static int bpf_core_calc_field_relo(const struct bpf_program *prog, + const struct bpf_field_reloc *relo, + const struct bpf_core_spec *spec, + __u32 *val, bool *validate) +{ + const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; + const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); + __u32 byte_off, byte_sz, bit_off, bit_sz; + const struct btf_member *m; + const struct btf_type *mt; + bool bitfield; + __s64 sz; + + /* a[n] accessor needs special handling */ + if (!acc->name) { + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + *val = spec->bit_offset / 8; + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + } else { + pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", + bpf_program__title(prog, false), + relo->kind, relo->insn_off / 8); + return -EINVAL; + } + if (validate) + *validate = true; + return 0; + } + + m = btf_members(t) + acc->idx; + mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); + bit_off = spec->bit_offset; + bit_sz = btf_member_bitfield_size(t, acc->idx); + + bitfield = bit_sz > 0; + if (bitfield) { + byte_sz = mt->size; + byte_off = bit_off / 8 / byte_sz * byte_sz; + /* figure out smallest int size necessary for bitfield load */ + while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { + if (byte_sz >= 8) { + /* bitfield can't be read with 64-bit read */ + pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", + bpf_program__title(prog, false), + relo->kind, relo->insn_off / 8); + return -E2BIG; + } + byte_sz *= 2; + byte_off = bit_off / 8 / byte_sz * byte_sz; + } + } else { + sz = btf__resolve_size(spec->btf, m->type); + if (sz < 0) + return -EINVAL; + byte_sz = sz; + byte_off = spec->bit_offset / 8; + bit_sz = byte_sz * 8; + } + + /* for bitfields, all the relocatable aspects are ambiguous and we + * might disagree with compiler, so turn off validation of expected + * value, except for signedness + */ + if (validate) + *validate = !bitfield; + + switch (relo->kind) { + case BPF_FIELD_BYTE_OFFSET: + *val = byte_off; + break; + case BPF_FIELD_BYTE_SIZE: + *val = byte_sz; + break; + case BPF_FIELD_SIGNED: + /* enums will be assumed unsigned */ + *val = btf_is_enum(mt) || + (btf_int_encoding(mt) & BTF_INT_SIGNED); + if (validate) + *validate = true; /* signedness is never ambiguous */ + break; + case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN + *val = 64 - (bit_off + bit_sz - byte_off * 8); +#else + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); +#endif + break; + case BPF_FIELD_RSHIFT_U64: + *val = 64 - bit_sz; + if (validate) + *validate = true; /* right shift is never ambiguous */ + break; + case BPF_FIELD_EXISTS: + default: + pr_warn("prog '%s': unknown relo %d at insn #%d\n", + bpf_program__title(prog, false), + relo->kind, relo->insn_off / 8); + return -EINVAL; + } + + return 0; +} + /* * Patch relocatable BPF instruction. - * Expected insn->imm value is provided for validation, as well as the new - * relocated value. + * + * Patched value is determined by relocation kind and target specification. + * For field existence relocation target spec will be NULL if field is not + * found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error. * * Currently three kinds of BPF instructions are supported: * 1. rX = <imm> (assignment with immediate operand); * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. *(rX) = <imm> (indirect memory assignment with immediate operand). - * - * If actual insn->imm value is wrong, bail out. */ -static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, - __u32 orig_off, __u32 new_off) +static int bpf_core_reloc_insn(struct bpf_program *prog, + const struct bpf_field_reloc *relo, + const struct bpf_core_spec *local_spec, + const struct bpf_core_spec *targ_spec) { + bool failed = false, validate = true; + __u32 orig_val, new_val; struct bpf_insn *insn; - int insn_idx; + int insn_idx, err; __u8 class; - if (insn_off % sizeof(struct bpf_insn)) + if (relo->insn_off % sizeof(struct bpf_insn)) return -EINVAL; - insn_idx = insn_off / sizeof(struct bpf_insn); + insn_idx = relo->insn_off / sizeof(struct bpf_insn); + + if (relo->kind == BPF_FIELD_EXISTS) { + orig_val = 1; /* can't generate EXISTS relo w/o local field */ + new_val = targ_spec ? 1 : 0; + } else if (!targ_spec) { + failed = true; + new_val = (__u32)-1; + } else { + err = bpf_core_calc_field_relo(prog, relo, local_spec, + &orig_val, &validate); + if (err) + return err; + err = bpf_core_calc_field_relo(prog, relo, targ_spec, + &new_val, NULL); + if (err) + return err; + } insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); @@ -2797,19 +3145,25 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, if (class == BPF_ALU || class == BPF_ALU64) { if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (insn->imm != orig_off) + if (!failed && validate && insn->imm != orig_val) { + pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + insn->imm, orig_val, new_val); return -EINVAL; - insn->imm = new_off; - pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", - bpf_program__title(prog, false), - insn_idx, orig_off, new_off); + } + orig_val = insn->imm; + insn->imm = new_val; + pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + failed ? " w/ failed reloc" : "", orig_val, new_val); } else { - pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), - insn_idx, insn->code, insn->src_reg, insn->dst_reg, - insn->off, insn->imm); + pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), + insn_idx, insn->code, insn->src_reg, insn->dst_reg, + insn->off, insn->imm); return -EINVAL; } + return 0; } @@ -2895,7 +3249,7 @@ static struct btf *bpf_core_find_kernel_btf(void) return btf; } - pr_warning("failed to find valid kernel BTF\n"); + pr_warn("failed to find valid kernel BTF\n"); return ERR_PTR(-ESRCH); } @@ -2919,7 +3273,8 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) libbpf_print(level, "%d%s", spec->raw_spec[i], i == spec->raw_len - 1 ? " => " : ":"); - libbpf_print(level, "%u @ &x", spec->offset); + libbpf_print(level, "%u.%u @ &x", + spec->bit_offset / 8, spec->bit_offset % 8); for (i = 0; i < spec->len; i++) { if (spec->spec[i].name) @@ -2976,7 +3331,7 @@ static void *u32_as_hash_key(__u32 x) * types should be compatible (see bpf_core_fields_are_compat for details). * 3. It is supported and expected that there might be multiple flavors * matching the spec. As long as all the specs resolve to the same set of - * offsets across all candidates, there is not error. If there is any + * offsets across all candidates, there is no error. If there is any * ambiguity, CO-RE relocation will fail. This is necessary to accomodate * imprefection of BTF deduplication, which can cause slight duplication of * the same BTF type, if some directly or indirectly referenced (by @@ -2991,12 +3346,12 @@ static void *u32_as_hash_key(__u32 x) * CPU-wise compared to prebuilding a map from all local type names to * a list of candidate type names. It's also sped up by caching resolved * list of matching candidates per each local "root" type ID, that has at - * least one bpf_offset_reloc associated with it. This list is shared + * least one bpf_field_reloc associated with it. This list is shared * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ -static int bpf_core_reloc_offset(struct bpf_program *prog, - const struct bpf_offset_reloc *relo, +static int bpf_core_reloc_field(struct bpf_program *prog, + const struct bpf_field_reloc *relo, int relo_idx, const struct btf *local_btf, const struct btf *targ_btf, @@ -3027,22 +3382,23 @@ static int bpf_core_reloc_offset(struct bpf_program *prog, err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); if (err) { - pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", - prog_name, relo_idx, local_id, local_name, spec_str, - err); + pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", + prog_name, relo_idx, local_id, local_name, spec_str, + err); return -EINVAL; } - pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx); + pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, + relo->kind); bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); libbpf_print(LIBBPF_DEBUG, "\n"); if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); if (IS_ERR(cand_ids)) { - pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", - prog_name, relo_idx, local_id, local_name, - PTR_ERR(cand_ids)); + pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", + prog_name, relo_idx, local_id, local_name, + PTR_ERR(cand_ids)); return PTR_ERR(cand_ids); } err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); @@ -3064,8 +3420,8 @@ static int bpf_core_reloc_offset(struct bpf_program *prog, bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); libbpf_print(LIBBPF_DEBUG, ": %d\n", err); if (err < 0) { - pr_warning("prog '%s': relo #%d: matching error: %d\n", - prog_name, relo_idx, err); + pr_warn("prog '%s': relo #%d: matching error: %d\n", + prog_name, relo_idx, err); return err; } if (err == 0) @@ -3073,31 +3429,42 @@ static int bpf_core_reloc_offset(struct bpf_program *prog, if (j == 0) { targ_spec = cand_spec; - } else if (cand_spec.offset != targ_spec.offset) { + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { /* if there are many candidates, they should all - * resolve to the same offset + * resolve to the same bit offset */ - pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec.offset, - targ_spec.offset); + pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.bit_offset, + targ_spec.bit_offset); return -EINVAL; } cand_ids->data[j++] = cand_spec.spec[0].type_id; } - cand_ids->len = j; - if (cand_ids->len == 0) { - pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); + /* + * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is + * requested, it's expected that we might not find any candidates. + * In this case, if field wasn't found in any candidate, the list of + * candidates shouldn't change at all, we'll just handle relocating + * appropriately, depending on relo's kind. + */ + if (j > 0) + cand_ids->len = j; + + if (j == 0 && !prog->obj->relaxed_core_relocs && + relo->kind != BPF_FIELD_EXISTS) { + pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); return -ESRCH; } - err = bpf_core_reloc_insn(prog, relo->insn_off, - local_spec.offset, targ_spec.offset); + /* bpf_core_reloc_insn should know how to handle missing targ_spec */ + err = bpf_core_reloc_insn(prog, relo, &local_spec, + j ? &targ_spec : NULL); if (err) { - pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", - prog_name, relo_idx, relo->insn_off, err); + pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", + prog_name, relo_idx, relo->insn_off, err); return -EINVAL; } @@ -3105,10 +3472,10 @@ static int bpf_core_reloc_offset(struct bpf_program *prog, } static int -bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) +bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; - const struct bpf_offset_reloc *rec; + const struct bpf_field_reloc *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; @@ -3122,8 +3489,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) else targ_btf = bpf_core_find_kernel_btf(); if (IS_ERR(targ_btf)) { - pr_warning("failed to get target BTF: %ld\n", - PTR_ERR(targ_btf)); + pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); } @@ -3133,7 +3499,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) goto out; } - seg = &obj->btf_ext->offset_reloc_info; + seg = &obj->btf_ext->field_reloc_info; for_each_btf_ext_sec(seg, sec) { sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { @@ -3142,8 +3508,8 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) } prog = bpf_object__find_program_by_title(obj, sec_name); if (!prog) { - pr_warning("failed to find program '%s' for CO-RE offset relocation\n", - sec_name); + pr_warn("failed to find program '%s' for CO-RE offset relocation\n", + sec_name); err = -EINVAL; goto out; } @@ -3152,11 +3518,11 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { - err = bpf_core_reloc_offset(prog, rec, i, obj->btf, - targ_btf, cand_cache); + err = bpf_core_reloc_field(prog, rec, i, obj->btf, + targ_btf, cand_cache); if (err) { - pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", - sec_name, i, err); + pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", + sec_name, i, err); goto out; } } @@ -3178,8 +3544,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { int err = 0; - if (obj->btf_ext->offset_reloc_info.len) - err = bpf_core_reloc_offsets(obj, targ_btf_path); + if (obj->btf_ext->field_reloc_info.len) + err = bpf_core_reloc_fields(obj, targ_btf_path); return err; } @@ -3197,23 +3563,24 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, return -LIBBPF_ERRNO__RELOC; if (prog->idx == obj->efile.text_shndx) { - pr_warning("relo in .text insn %d into off %d\n", - relo->insn_idx, relo->text_off); + pr_warn("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); return -LIBBPF_ERRNO__RELOC; } if (prog->main_prog_cnt == 0) { text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); if (!text) { - pr_warning("no .text section found yet relo into text exist\n"); + pr_warn("no .text section found yet relo into text exist\n"); return -LIBBPF_ERRNO__RELOC; } new_cnt = prog->insns_cnt + text->insns_cnt; new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); if (!new_insn) { - pr_warning("oom in prog realloc\n"); + pr_warn("oom in prog realloc\n"); return -ENOMEM; } + prog->insns = new_insn; if (obj->btf_ext) { err = bpf_program_reloc_btf_ext(prog, obj, @@ -3225,7 +3592,6 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, memcpy(new_insn + prog->insns_cnt, text->insns, text->insns_cnt * sizeof(*insn)); - prog->insns = new_insn; prog->main_prog_cnt = prog->insns_cnt; prog->insns_cnt = new_cnt; pr_debug("added %zd insn from %s to prog %s\n", @@ -3233,7 +3599,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, prog->section_name); } insn = &prog->insns[relo->insn_idx]; - insn->imm += prog->main_prog_cnt - relo->insn_idx; + insn->imm += relo->text_off + prog->main_prog_cnt - relo->insn_idx; return 0; } @@ -3266,8 +3632,8 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) map_idx = prog->reloc_desc[i].map_idx; if (insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warning("relocation out of range: '%s'\n", - prog->section_name); + pr_warn("relocation out of range: '%s'\n", + prog->section_name); return -LIBBPF_ERRNO__RELOC; } @@ -3301,8 +3667,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warning("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %d\n", + err); return err; } } @@ -3311,8 +3677,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err = bpf_program__relocate(prog, obj); if (err) { - pr_warning("failed to relocate '%s'\n", - prog->section_name); + pr_warn("failed to relocate '%s'\n", prog->section_name); return err; } } @@ -3324,24 +3689,24 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) int i, err; if (!obj_elf_valid(obj)) { - pr_warning("Internal error: elf object is closed\n"); + pr_warn("Internal error: elf object is closed\n"); return -LIBBPF_ERRNO__INTERNAL; } - for (i = 0; i < obj->efile.nr_reloc; i++) { - GElf_Shdr *shdr = &obj->efile.reloc[i].shdr; - Elf_Data *data = obj->efile.reloc[i].data; + for (i = 0; i < obj->efile.nr_reloc_sects; i++) { + GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; + Elf_Data *data = obj->efile.reloc_sects[i].data; int idx = shdr->sh_info; struct bpf_program *prog; if (shdr->sh_type != SHT_REL) { - pr_warning("internal error at %d\n", __LINE__); + pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { - pr_warning("relocation failed: no section(%d)\n", idx); + pr_warn("relocation failed: no section(%d)\n", idx); return -LIBBPF_ERRNO__RELOC; } @@ -3373,8 +3738,13 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; - load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog->prog_ifindex; + if (prog->type == BPF_PROG_TYPE_TRACING) { + load_attr.attach_prog_fd = prog->attach_prog_fd; + load_attr.attach_btf_id = prog->attach_btf_id; + } else { + load_attr.kern_version = kern_version; + load_attr.prog_ifindex = prog->prog_ifindex; + } /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ if (prog->obj->btf_ext) btf_fd = bpf_object__btf_fd(prog->obj); @@ -3393,7 +3763,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, retry_load: log_buf = malloc(log_buf_size); if (!log_buf) - pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); + pr_warn("Alloc log buffer for bpf loader error, continue without log\n"); ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); @@ -3410,36 +3780,31 @@ retry_load: free(log_buf); goto retry_load; } - ret = -LIBBPF_ERRNO__LOAD; + ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("load bpf program failed: %s\n", cp); + pr_warn("load bpf program failed: %s\n", cp); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; - pr_warning("-- BEGIN DUMP LOG ---\n"); - pr_warning("\n%s\n", log_buf); - pr_warning("-- END LOG --\n"); + pr_warn("-- BEGIN DUMP LOG ---\n"); + pr_warn("\n%s\n", log_buf); + pr_warn("-- END LOG --\n"); } else if (load_attr.insns_cnt >= BPF_MAXINSNS) { - pr_warning("Program too large (%zu insns), at most %d insns\n", - load_attr.insns_cnt, BPF_MAXINSNS); + pr_warn("Program too large (%zu insns), at most %d insns\n", + load_attr.insns_cnt, BPF_MAXINSNS); ret = -LIBBPF_ERRNO__PROG2BIG; - } else { + } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { /* Wrong program type? */ - if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { - int fd; - - load_attr.prog_type = BPF_PROG_TYPE_KPROBE; - load_attr.expected_attach_type = 0; - fd = bpf_load_program_xattr(&load_attr, NULL, 0); - if (fd >= 0) { - close(fd); - ret = -LIBBPF_ERRNO__PROGTYPE; - goto out; - } - } + int fd; - if (log_buf) - ret = -LIBBPF_ERRNO__KVER; + load_attr.prog_type = BPF_PROG_TYPE_KPROBE; + load_attr.expected_attach_type = 0; + fd = bpf_load_program_xattr(&load_attr, NULL, 0); + if (fd >= 0) { + close(fd); + ret = -LIBBPF_ERRNO__PROGTYPE; + goto out; + } } out: @@ -3455,14 +3820,14 @@ bpf_program__load(struct bpf_program *prog, if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { - pr_warning("Internal error: can't load program '%s'\n", - prog->section_name); + pr_warn("Internal error: can't load program '%s'\n", + prog->section_name); return -LIBBPF_ERRNO__INTERNAL; } prog->instances.fds = malloc(sizeof(int)); if (!prog->instances.fds) { - pr_warning("Not enough memory for BPF fds\n"); + pr_warn("Not enough memory for BPF fds\n"); return -ENOMEM; } prog->instances.nr = 1; @@ -3471,8 +3836,8 @@ bpf_program__load(struct bpf_program *prog, if (!prog->preprocessor) { if (prog->instances.nr != 1) { - pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", - prog->section_name, prog->instances.nr); + pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", + prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_version, &fd); @@ -3489,8 +3854,8 @@ bpf_program__load(struct bpf_program *prog, err = preprocessor(prog, i, prog->insns, prog->insns_cnt, &result); if (err) { - pr_warning("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->section_name); + pr_warn("Preprocessing the %dth instance of program '%s' failed\n", + i, prog->section_name); goto out; } @@ -3508,8 +3873,8 @@ bpf_program__load(struct bpf_program *prog, license, kern_version, &fd); if (err) { - pr_warning("Loading the %dth instance of program '%s' failed\n", - i, prog->section_name); + pr_warn("Loading the %dth instance of program '%s' failed\n", + i, prog->section_name); goto out; } @@ -3519,8 +3884,7 @@ bpf_program__load(struct bpf_program *prog, } out: if (err) - pr_warning("failed to load program '%s'\n", - prog->section_name); + pr_warn("failed to load program '%s'\n", prog->section_name); zfree(&prog->insns); prog->insns_cnt = 0; return err; @@ -3551,93 +3915,104 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) -{ - switch (type) { - case BPF_PROG_TYPE_SOCKET_FILTER: - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_SCHED_ACT: - case BPF_PROG_TYPE_XDP: - case BPF_PROG_TYPE_CGROUP_SKB: - case BPF_PROG_TYPE_CGROUP_SOCK: - case BPF_PROG_TYPE_LWT_IN: - case BPF_PROG_TYPE_LWT_OUT: - case BPF_PROG_TYPE_LWT_XMIT: - case BPF_PROG_TYPE_LWT_SEG6LOCAL: - case BPF_PROG_TYPE_SOCK_OPS: - case BPF_PROG_TYPE_SK_SKB: - case BPF_PROG_TYPE_CGROUP_DEVICE: - case BPF_PROG_TYPE_SK_MSG: - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - case BPF_PROG_TYPE_LIRC_MODE2: - case BPF_PROG_TYPE_SK_REUSEPORT: - case BPF_PROG_TYPE_FLOW_DISSECTOR: - case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_RAW_TRACEPOINT: - case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: - case BPF_PROG_TYPE_PERF_EVENT: - case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_CGROUP_SOCKOPT: - return false; - case BPF_PROG_TYPE_KPROBE: - default: - return true; - } -} - -static int bpf_object__validate(struct bpf_object *obj, bool needs_kver) -{ - if (needs_kver && obj->kern_version == 0) { - pr_warning("%s doesn't provide kernel version\n", - obj->path); - return -LIBBPF_ERRNO__KVERSION; - } - return 0; -} - +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd); static struct bpf_object * -__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, - bool needs_kver, int flags) +__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts) { + const char *pin_root_path; + struct bpf_program *prog; struct bpf_object *obj; + const char *obj_name; + char tmp_name[64]; + bool relaxed_maps; + __u32 attach_prog_fd; int err; if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warning("failed to init libelf for %s\n", path); + pr_warn("failed to init libelf for %s\n", + path ? : "(mem buf)"); return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } - obj = bpf_object__new(path, obj_buf, obj_buf_sz); + if (!OPTS_VALID(opts, bpf_object_open_opts)) + return ERR_PTR(-EINVAL); + + obj_name = OPTS_GET(opts, object_name, NULL); + if (obj_buf) { + if (!obj_name) { + snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", + (unsigned long)obj_buf, + (unsigned long)obj_buf_sz); + obj_name = tmp_name; + } + path = obj_name; + pr_debug("loading object '%s' from buffer\n", obj_name); + } + + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; + obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); + relaxed_maps = OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); + attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); + CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); + CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), + err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); - CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); - bpf_object__elf_finish(obj); + + bpf_object__for_each_program(prog, obj) { + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + + err = libbpf_prog_type_by_name(prog->section_name, &prog_type, + &attach_type); + if (err == -ESRCH) + /* couldn't guess, but user might manually specify */ + continue; + if (err) + goto out; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + if (prog_type == BPF_PROG_TYPE_TRACING) { + err = libbpf_find_attach_btf_id(prog->section_name, + attach_type, + attach_prog_fd); + if (err <= 0) + goto out; + prog->attach_btf_id = err; + prog->attach_prog_fd = attach_prog_fd; + } + } + return obj; out: bpf_object__close(obj); return ERR_PTR(err); } -struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, - int flags) +static struct bpf_object * +__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) { + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .relaxed_maps = flags & MAPS_RELAX_COMPAT, + ); + /* param validation */ if (!attr->file) return NULL; pr_debug("loading %s\n", attr->file); - - return __bpf_object__open(attr->file, NULL, 0, - bpf_prog_type__needs_kver(attr->prog_type), - flags); + return __bpf_object__open(attr->file, NULL, 0, &opts); } struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -3655,25 +4030,42 @@ struct bpf_object *bpf_object__open(const char *path) return bpf_object__open_xattr(&attr); } -struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz, - const char *name) +struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) { - char tmp_name[64]; + if (!path) + return ERR_PTR(-EINVAL); - /* param validation */ - if (!obj_buf || obj_buf_sz <= 0) - return NULL; + pr_debug("loading %s\n", path); - if (!name) { - snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", - (unsigned long)obj_buf, - (unsigned long)obj_buf_sz); - name = tmp_name; - } - pr_debug("loading object '%s' from buffer\n", name); + return __bpf_object__open(path, NULL, 0, opts); +} + +struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts) +{ + if (!obj_buf || obj_buf_sz == 0) + return ERR_PTR(-EINVAL); - return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true); + return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts); +} + +struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, + const char *name) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .object_name = name, + /* wrong default, but backwards-compatible */ + .relaxed_maps = true, + ); + + /* returning NULL is wrong, but backwards-compatible */ + if (!obj_buf || obj_buf_sz == 0) + return NULL; + + return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); } int bpf_object__unload(struct bpf_object *obj) @@ -3695,7 +4087,7 @@ int bpf_object__unload(struct bpf_object *obj) int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { struct bpf_object *obj; - int err; + int err, i; if (!attr) return -EINVAL; @@ -3704,7 +4096,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return -EINVAL; if (obj->loaded) { - pr_warning("object should not be loaded twice\n"); + pr_warn("object should not be loaded twice\n"); return -EINVAL; } @@ -3716,8 +4108,13 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return 0; out: + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); + bpf_object__unload(obj); - pr_warning("failed to load object '%s'\n", obj->path); + pr_warn("failed to load object '%s'\n", obj->path); return err; } @@ -3730,6 +4127,28 @@ int bpf_object__load(struct bpf_object *obj) return bpf_object__load_xattr(&attr); } +static int make_parent_dir(const char *path) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + char *dname, *dir; + int err = 0; + + dname = strdup(path); + if (dname == NULL) + return -ENOMEM; + + dir = dirname(dname); + if (mkdir(dir, 0700) && errno != EEXIST) + err = -errno; + + free(dname); + if (err) { + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("failed to mkdir %s: %s\n", path, cp); + } + return err; +} + static int check_path(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -3747,13 +4166,13 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, cp); err = -errno; } free(dname); if (!err && st_fs.f_type != BPF_FS_MAGIC) { - pr_warning("specified path %s is not on BPF FS\n", path); + pr_warn("specified path %s is not on BPF FS\n", path); err = -EINVAL; } @@ -3766,24 +4185,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, char *cp, errmsg[STRERR_BUFSIZE]; int err; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); if (err) return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (instance < 0 || instance >= prog->instances.nr) { - pr_warning("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); + pr_warn("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); return -EINVAL; } if (bpf_obj_pin(prog->instances.fds[instance], path)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("failed to pin program: %s\n", cp); + pr_warn("failed to pin program: %s\n", cp); return -errno; } pr_debug("pinned program '%s'\n", path); @@ -3801,13 +4224,13 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (instance < 0 || instance >= prog->instances.nr) { - pr_warning("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); + pr_warn("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); return -EINVAL; } @@ -3819,36 +4242,25 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return 0; } -static int make_dir(const char *path) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - int err = 0; - - if (mkdir(path, 0700) && errno != EEXIST) - err = -errno; - - if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warning("failed to mkdir %s: %s\n", path, cp); - } - return err; -} - int bpf_program__pin(struct bpf_program *prog, const char *path) { int i, err; + err = make_parent_dir(path); + if (err) + return err; + err = check_path(path); if (err) return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (prog->instances.nr <= 0) { - pr_warning("no instances of prog %s to pin\n", + pr_warn("no instances of prog %s to pin\n", prog->section_name); return -EINVAL; } @@ -3858,10 +4270,6 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return bpf_program__pin_instance(prog, path, 0); } - err = make_dir(path); - if (err) - return err; - for (i = 0; i < prog->instances.nr; i++) { char buf[PATH_MAX]; int len; @@ -3910,12 +4318,12 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (prog->instances.nr <= 0) { - pr_warning("no instances of prog %s to pin\n", + pr_warn("no instances of prog %s to pin\n", prog->section_name); return -EINVAL; } @@ -3952,47 +4360,123 @@ int bpf_map__pin(struct bpf_map *map, const char *path) char *cp, errmsg[STRERR_BUFSIZE]; int err; - err = check_path(path); - if (err) - return err; - if (map == NULL) { - pr_warning("invalid map pointer\n"); + pr_warn("invalid map pointer\n"); return -EINVAL; } - if (bpf_obj_pin(map->fd, path)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("failed to pin map: %s\n", cp); - return -errno; + if (map->pin_path) { + if (path && strcmp(path, map->pin_path)) { + pr_warn("map '%s' already has pin path '%s' different from '%s'\n", + bpf_map__name(map), map->pin_path, path); + return -EINVAL; + } else if (map->pinned) { + pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", + bpf_map__name(map), map->pin_path); + return 0; + } + } else { + if (!path) { + pr_warn("missing a path to pin map '%s' at\n", + bpf_map__name(map)); + return -EINVAL; + } else if (map->pinned) { + pr_warn("map '%s' already pinned\n", bpf_map__name(map)); + return -EEXIST; + } + + map->pin_path = strdup(path); + if (!map->pin_path) { + err = -errno; + goto out_err; + } } - pr_debug("pinned map '%s'\n", path); + err = make_parent_dir(map->pin_path); + if (err) + return err; + + err = check_path(map->pin_path); + if (err) + return err; + + if (bpf_obj_pin(map->fd, map->pin_path)) { + err = -errno; + goto out_err; + } + + map->pinned = true; + pr_debug("pinned map '%s'\n", map->pin_path); return 0; + +out_err: + cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); + pr_warn("failed to pin map: %s\n", cp); + return err; } int bpf_map__unpin(struct bpf_map *map, const char *path) { int err; - err = check_path(path); - if (err) - return err; - if (map == NULL) { - pr_warning("invalid map pointer\n"); + pr_warn("invalid map pointer\n"); + return -EINVAL; + } + + if (map->pin_path) { + if (path && strcmp(path, map->pin_path)) { + pr_warn("map '%s' already has pin path '%s' different from '%s'\n", + bpf_map__name(map), map->pin_path, path); + return -EINVAL; + } + path = map->pin_path; + } else if (!path) { + pr_warn("no path to unpin map '%s' from\n", + bpf_map__name(map)); return -EINVAL; } + err = check_path(path); + if (err) + return err; + err = unlink(path); if (err != 0) return -errno; - pr_debug("unpinned map '%s'\n", path); + map->pinned = false; + pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); + + return 0; +} + +int bpf_map__set_pin_path(struct bpf_map *map, const char *path) +{ + char *new = NULL; + + if (path) { + new = strdup(path); + if (!new) + return -errno; + } + + free(map->pin_path); + map->pin_path = new; return 0; } +const char *bpf_map__get_pin_path(const struct bpf_map *map) +{ + return map->pin_path; +} + +bool bpf_map__is_pinned(const struct bpf_map *map) +{ + return map->pinned; +} + int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { struct bpf_map *map; @@ -4002,29 +4486,32 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return -ENOENT; if (!obj->loaded) { - pr_warning("object not yet loaded; load it first\n"); + pr_warn("object not yet loaded; load it first\n"); return -ENOENT; } - err = make_dir(path); - if (err) - return err; - bpf_object__for_each_map(map, obj) { + char *pin_path = NULL; char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) { - err = -EINVAL; - goto err_unpin_maps; - } else if (len >= PATH_MAX) { - err = -ENAMETOOLONG; - goto err_unpin_maps; + if (path) { + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + pin_path = buf; + } else if (!map->pin_path) { + continue; } - err = bpf_map__pin(map, buf); + err = bpf_map__pin(map, pin_path); if (err) goto err_unpin_maps; } @@ -4033,17 +4520,10 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) err_unpin_maps: while ((map = bpf_map__prev(map, obj))) { - char buf[PATH_MAX]; - int len; - - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - continue; - else if (len >= PATH_MAX) + if (!map->pin_path) continue; - bpf_map__unpin(map, buf); + bpf_map__unpin(map, NULL); } return err; @@ -4058,17 +4538,24 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) return -ENOENT; bpf_object__for_each_map(map, obj) { + char *pin_path = NULL; char buf[PATH_MAX]; - int len; - len = snprintf(buf, PATH_MAX, "%s/%s", path, - bpf_map__name(map)); - if (len < 0) - return -EINVAL; - else if (len >= PATH_MAX) - return -ENAMETOOLONG; + if (path) { + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + pin_path = buf; + } else if (!map->pin_path) { + continue; + } - err = bpf_map__unpin(map, buf); + err = bpf_map__unpin(map, pin_path); if (err) return err; } @@ -4085,14 +4572,10 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) return -ENOENT; if (!obj->loaded) { - pr_warning("object not yet loaded; load it first\n"); + pr_warn("object not yet loaded; load it first\n"); return -ENOENT; } - err = make_dir(path); - if (err) - return err; - bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; int len; @@ -4193,6 +4676,7 @@ void bpf_object__close(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); + zfree(&obj->maps[i].pin_path); if (obj->maps[i].clear_priv) obj->maps[i].clear_priv(&obj->maps[i], obj->maps[i].priv); @@ -4236,7 +4720,7 @@ bpf_object__next(struct bpf_object *prev) const char *bpf_object__name(const struct bpf_object *obj) { - return obj ? obj->path : ERR_PTR(-EINVAL); + return obj ? obj->name : ERR_PTR(-EINVAL); } unsigned int bpf_object__kversion(const struct bpf_object *obj) @@ -4286,7 +4770,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, &obj->programs[nr_programs - 1]; if (p->obj != obj) { - pr_warning("error: program handler doesn't match object\n"); + pr_warn("error: program handler doesn't match object\n"); return NULL; } @@ -4349,7 +4833,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) if (needs_copy) { title = strdup(title); if (!title) { - pr_warning("failed to strdup program title\n"); + pr_warn("failed to strdup program title\n"); return ERR_PTR(-ENOMEM); } } @@ -4362,6 +4846,11 @@ int bpf_program__fd(const struct bpf_program *prog) return bpf_program__nth_fd(prog, 0); } +size_t bpf_program__size(const struct bpf_program *prog) +{ + return prog->insns_cnt * sizeof(struct bpf_insn); +} + int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, bpf_program_prep_t prep) { @@ -4371,13 +4860,13 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, return -EINVAL; if (prog->instances.nr > 0 || prog->instances.fds) { - pr_warning("Can't set pre-processor after loading\n"); + pr_warn("Can't set pre-processor after loading\n"); return -EINVAL; } instances_fds = malloc(sizeof(int) * nr_instances); if (!instances_fds) { - pr_warning("alloc memory failed for fds\n"); + pr_warn("alloc memory failed for fds\n"); return -ENOMEM; } @@ -4398,21 +4887,26 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n) return -EINVAL; if (n >= prog->instances.nr || n < 0) { - pr_warning("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->section_name, prog->instances.nr); + pr_warn("Can't get the %dth fd from program %s: only %d instances\n", + n, prog->section_name, prog->instances.nr); return -EINVAL; } fd = prog->instances.fds[n]; if (fd < 0) { - pr_warning("%dth instance of program '%s' is invalid\n", - n, prog->section_name); + pr_warn("%dth instance of program '%s' is invalid\n", + n, prog->section_name); return -ENOENT; } return fd; } +enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog) +{ + return prog->type; +} + void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { prog->type = type; @@ -4446,6 +4940,13 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT); BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); + +enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog) +{ + return prog->expected_attach_type; +} void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) @@ -4453,19 +4954,23 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \ - { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \ + { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype } /* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0) +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) /* Programs that can be attached. */ #define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype) + BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype) /* Programs that must specify expected attach type at load time. */ #define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype) + BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype) + +/* Programs that use BTF to identify attach point */ +#define BPF_PROG_BTF(string, ptype, eatype) \ + BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0) /* Programs that can be attached but attach type can't be identified by section * name. Kept for backward compatibility. @@ -4477,16 +4982,27 @@ static const struct { size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; - int is_attachable; + bool is_attachable; + bool is_attach_btf; enum bpf_attach_type attach_type; } section_names[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_RAW_TP), + BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_FENTRY), + BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, + BPF_TRACE_FEXIT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -4593,14 +5109,105 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, *expected_attach_type = section_names[i].expected_attach_type; return 0; } - pr_warning("failed to guess program type based on ELF section name '%s'\n", name); + pr_warn("failed to guess program type from ELF section '%s'\n", name); type_names = libbpf_get_type_names(false); if (type_names != NULL) { pr_info("supported section(type) names are:%s\n", type_names); free(type_names); } - return -EINVAL; + return -ESRCH; +} + +#define BTF_PREFIX "btf_trace_" +int libbpf_find_vmlinux_btf_id(const char *name, + enum bpf_attach_type attach_type) +{ + struct btf *btf = bpf_core_find_kernel_btf(); + char raw_tp_btf[128] = BTF_PREFIX; + char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; + const char *btf_name; + int err = -EINVAL; + __u32 kind; + + if (IS_ERR(btf)) { + pr_warn("vmlinux BTF is not found\n"); + return -EINVAL; + } + + if (attach_type == BPF_TRACE_RAW_TP) { + /* prepend "btf_trace_" prefix per kernel convention */ + strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); + btf_name = raw_tp_btf; + kind = BTF_KIND_TYPEDEF; + } else { + btf_name = name; + kind = BTF_KIND_FUNC; + } + err = btf__find_by_name_kind(btf, btf_name, kind); + btf__free(btf); + return err; +} + +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info *info; + struct btf *btf = NULL; + int err = -EINVAL; + + info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); + if (IS_ERR_OR_NULL(info_linear)) { + pr_warn("failed get_prog_info_linear for FD %d\n", + attach_prog_fd); + return -EINVAL; + } + info = &info_linear->info; + if (!info->btf_id) { + pr_warn("The target program doesn't have BTF\n"); + goto out; + } + if (btf__get_from_id(info->btf_id, &btf)) { + pr_warn("Failed to get BTF of the program\n"); + goto out; + } + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); + btf__free(btf); + if (err <= 0) { + pr_warn("%s is not found in prog's BTF\n", name); + goto out; + } +out: + free(info_linear); + return err; +} + +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd) +{ + int i, err; + + if (!name) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (!section_names[i].is_attach_btf) + continue; + if (strncmp(name, section_names[i].sec, section_names[i].len)) + continue; + if (attach_prog_fd) + err = libbpf_find_prog_btf_id(name + section_names[i].len, + attach_prog_fd); + else + err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, + attach_type); + if (err <= 0) + pr_warn("%s is not found in vmlinux BTF\n", name); + return err; + } + pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); + return -ESRCH; } int libbpf_attach_type_by_name(const char *name, @@ -4620,7 +5227,7 @@ int libbpf_attach_type_by_name(const char *name, *attach_type = section_names[i].attach_type; return 0; } - pr_warning("failed to guess attach type based on ELF section name '%s'\n", name); + pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(true); if (type_names != NULL) { pr_info("attachable section(type) names are:%s\n", type_names); @@ -4630,15 +5237,6 @@ int libbpf_attach_type_by_name(const char *name, return -EINVAL; } -static int -bpf_program__identify_section(struct bpf_program *prog, - enum bpf_prog_type *prog_type, - enum bpf_attach_type *expected_attach_type) -{ - return libbpf_prog_type_by_name(prog->section_name, prog_type, - expected_attach_type); -} - int bpf_map__fd(const struct bpf_map *map) { return map ? map->fd : -EINVAL; @@ -4703,11 +5301,11 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) { if (!bpf_map_type__is_map_in_map(map->def.type)) { - pr_warning("error: unsupported map type\n"); + pr_warn("error: unsupported map type\n"); return -EINVAL; } if (map->inner_map_fd != -1) { - pr_warning("error: inner_map_fd already specified\n"); + pr_warn("error: inner_map_fd already specified\n"); return -EINVAL; } map->inner_map_fd = fd; @@ -4727,8 +5325,8 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) e = obj->maps + obj->nr_maps; if ((m < s) || (m >= e)) { - pr_warning("error in %s: map handler doesn't belong to object\n", - __func__); + pr_warn("error in %s: map handler doesn't belong to object\n", + __func__); return NULL; } @@ -4806,8 +5404,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, { struct bpf_object_open_attr open_attr = {}; struct bpf_program *prog, *first_prog = NULL; - enum bpf_attach_type expected_attach_type; - enum bpf_prog_type prog_type; struct bpf_object *obj; struct bpf_map *map; int err; @@ -4825,26 +5421,27 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return -ENOENT; bpf_object__for_each_program(prog, obj) { + enum bpf_attach_type attach_type = attr->expected_attach_type; /* - * If type is not specified, try to guess it based on - * section name. + * to preserve backwards compatibility, bpf_prog_load treats + * attr->prog_type, if specified, as an override to whatever + * bpf_object__open guessed */ - prog_type = attr->prog_type; - prog->prog_ifindex = attr->ifindex; - expected_attach_type = attr->expected_attach_type; - if (prog_type == BPF_PROG_TYPE_UNSPEC) { - err = bpf_program__identify_section(prog, &prog_type, - &expected_attach_type); - if (err < 0) { - bpf_object__close(obj); - return -EINVAL; - } + if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { + bpf_program__set_type(prog, attr->prog_type); + bpf_program__set_expected_attach_type(prog, + attach_type); + } + if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { + /* + * we haven't guessed from section name and user + * didn't provide a fallback type, too bad... + */ + bpf_object__close(obj); + return -EINVAL; } - bpf_program__set_type(prog, prog_type); - bpf_program__set_expected_attach_type(prog, - expected_attach_type); - + prog->prog_ifindex = attr->ifindex; prog->log_level = attr->log_level; prog->prog_flags = attr->prog_flags; if (!first_prog) @@ -4857,7 +5454,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } if (!first_prog) { - pr_warning("object file doesn't contain bpf program\n"); + pr_warn("object file doesn't contain bpf program\n"); bpf_object__close(obj); return -ENOENT; } @@ -4916,14 +5513,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int prog_fd, err; if (pfd < 0) { - pr_warning("program '%s': invalid perf event FD %d\n", - bpf_program__title(prog, false), pfd); + pr_warn("program '%s': invalid perf event FD %d\n", + bpf_program__title(prog, false), pfd); return ERR_PTR(-EINVAL); } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n", - bpf_program__title(prog, false)); + pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", + bpf_program__title(prog, false)); return ERR_PTR(-EINVAL); } @@ -4936,16 +5533,16 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; free(link); - pr_warning("program '%s': failed to attach to pfd %d: %s\n", - bpf_program__title(prog, false), pfd, + pr_warn("program '%s': failed to attach to pfd %d: %s\n", + bpf_program__title(prog, false), pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; free(link); - pr_warning("program '%s': failed to enable pfd %d: %s\n", - bpf_program__title(prog, false), pfd, + pr_warn("program '%s': failed to enable pfd %d: %s\n", + bpf_program__title(prog, false), pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } @@ -5020,9 +5617,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, type = uprobe ? determine_uprobe_perf_type() : determine_kprobe_perf_type(); if (type < 0) { - pr_warning("failed to determine %s perf type: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + pr_warn("failed to determine %s perf type: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); return type; } if (retprobe) { @@ -5030,10 +5627,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, : determine_kprobe_retprobe_bit(); if (bit < 0) { - pr_warning("failed to determine %s retprobe bit: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, - sizeof(errmsg))); + pr_warn("failed to determine %s retprobe bit: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); return bit; } attr.config |= 1 << bit; @@ -5050,9 +5646,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warning("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("%s perf_event_open() failed: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return err; } return pfd; @@ -5069,20 +5665,20 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, 0 /* offset */, -1 /* pid */); if (pfd < 0) { - pr_warning("program '%s': failed to create %s '%s' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link = bpf_program__attach_perf_event(prog, pfd); if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warning("program '%s': failed to attach to %s '%s': %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to %s '%s': %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } return link; @@ -5100,22 +5696,22 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid); if (pfd < 0) { - pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", - binary_path, func_offset, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link = bpf_program__attach_perf_event(prog, pfd); if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", - binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } return link; @@ -5149,9 +5745,9 @@ static int perf_event_open_tracepoint(const char *tp_category, tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { - pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n", - tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", + tp_category, tp_name, + libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); return tp_id; } @@ -5163,9 +5759,9 @@ static int perf_event_open_tracepoint(const char *tp_category, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n", - tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return err; } return pfd; @@ -5181,20 +5777,20 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, pfd = perf_event_open_tracepoint(tp_category, tp_name); if (pfd < 0) { - pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link = bpf_program__attach_perf_event(prog, pfd); if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } return link; @@ -5216,8 +5812,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warning("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); return ERR_PTR(-EINVAL); } @@ -5230,9 +5826,40 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, if (pfd < 0) { pfd = -errno; free(link); - pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n", - bpf_program__title(prog, false), tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", + bpf_program__title(prog, false), tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + return ERR_PTR(pfd); + } + link->fd = pfd; + return (struct bpf_link *)link; +} + +struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +{ + char errmsg[STRERR_BUFSIZE]; + struct bpf_link_fd *link; + int prog_fd, pfd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); + return ERR_PTR(-EINVAL); + } + + link = malloc(sizeof(*link)); + if (!link) + return ERR_PTR(-ENOMEM); + link->link.destroy = &bpf_link__destroy_fd; + + pfd = bpf_raw_tracepoint_open(NULL, prog_fd); + if (pfd < 0) { + pfd = -errno; + free(link); + pr_warn("program '%s': failed to attach to trace: %s\n", + bpf_program__title(prog, false), + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link->fd = pfd; @@ -5334,7 +5961,7 @@ static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, return; if (cpu_buf->base && munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) - pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); + pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); if (cpu_buf->fd >= 0) { ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); close(cpu_buf->fd); @@ -5384,8 +6011,8 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, -1, PERF_FLAG_FD_CLOEXEC); if (cpu_buf->fd < 0) { err = -errno; - pr_warning("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to open perf buffer event on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5395,15 +6022,15 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->base == MAP_FAILED) { cpu_buf->base = NULL; err = -errno; - pr_warning("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; - pr_warning("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5463,8 +6090,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, int err, i; if (page_cnt & (page_cnt - 1)) { - pr_warning("page count should be power of two, but is %zu\n", - page_cnt); + pr_warn("page count should be power of two, but is %zu\n", + page_cnt); return ERR_PTR(-EINVAL); } @@ -5472,14 +6099,14 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); if (err) { err = -errno; - pr_warning("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to get map info for map FD %d: %s\n", + map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); return ERR_PTR(err); } if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { - pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", - map.name); + pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", + map.name); return ERR_PTR(-EINVAL); } @@ -5499,8 +6126,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (pb->epoll_fd < 0) { err = -errno; - pr_warning("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to create epoll instance: %s\n", + libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5519,13 +6146,13 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); if (!pb->events) { err = -ENOMEM; - pr_warning("failed to allocate events: out of memory\n"); + pr_warn("failed to allocate events: out of memory\n"); goto error; } pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); if (!pb->cpu_bufs) { err = -ENOMEM; - pr_warning("failed to allocate buffers: out of memory\n"); + pr_warn("failed to allocate buffers: out of memory\n"); goto error; } @@ -5548,9 +6175,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, &cpu_buf->fd, 0); if (err) { err = -errno; - pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n", - cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", + cpu, map_key, cpu_buf->fd, + libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5559,9 +6186,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, &pb->events[i]) < 0) { err = -errno; - pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n", - cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", + cpu, cpu_buf->fd, + libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } } @@ -5614,7 +6241,7 @@ perf_buffer__process_record(struct perf_event_header *e, void *ctx) break; } default: - pr_warning("unknown perf sample type %d\n", e->type); + pr_warn("unknown perf sample type %d\n", e->type); return LIBBPF_PERF_EVENT_ERROR; } return LIBBPF_PERF_EVENT_CONT; @@ -5644,7 +6271,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warning("error while processing records: %d\n", err); + pr_warn("error while processing records: %d\n", err); return err; } } @@ -5708,7 +6335,8 @@ static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { }; -static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset) +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, + int offset) { __u32 *array = (__u32 *)info; @@ -5717,7 +6345,8 @@ static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offse return -(int)offset; } -static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset) +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, + int offset) { __u64 *array = (__u64 *)info; @@ -5841,13 +6470,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays) v2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); if (v1 != v2) - pr_warning("%s: mismatch in element count\n", __func__); + pr_warn("%s: mismatch in element count\n", __func__); v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); v2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->size_offset); if (v1 != v2) - pr_warning("%s: mismatch in rec size\n", __func__); + pr_warn("%s: mismatch in rec size\n", __func__); } /* step 7: update info_len and data_len */ @@ -5915,20 +6544,19 @@ int libbpf_num_possible_cpus(void) fd = open(fcpu, O_RDONLY); if (fd < 0) { error = errno; - pr_warning("Failed to open file %s: %s\n", - fcpu, strerror(error)); + pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); return -error; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { error = len ? errno : EINVAL; - pr_warning("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); + pr_warn("Failed to read # of possible cpus from %s: %s\n", + fcpu, strerror(error)); return -error; } if (len == sizeof(buf)) { - pr_warning("File %s size overflow\n", fcpu); + pr_warn("File %s size overflow\n", fcpu); return -EOVERFLOW; } buf[len] = '\0'; @@ -5939,8 +6567,8 @@ int libbpf_num_possible_cpus(void) buf[ir] = '\0'; n = sscanf(&buf[il], "%u-%u", &start, &end); if (n <= 0) { - pr_warning("Failed to get # CPUs from %s\n", - &buf[il]); + pr_warn("Failed to get # CPUs from %s\n", + &buf[il]); return -EINVAL; } else if (n == 1) { end = start; @@ -5950,7 +6578,7 @@ int libbpf_num_possible_cpus(void) } } if (tmp_cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); + pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e8f70977d137..0dbf4bfba0c4 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -67,18 +67,80 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + }; \ + }) + +struct bpf_object_open_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* object name override, if provided: + * - for object open from file, this will override setting object + * name from file path's base name; + * - for object open from memory buffer, this will specify an object + * name and will override default "<addr>-<buf-size>" name; + */ + const char *object_name; + /* parse map definitions non-strictly, allowing extra attributes/data */ + bool relaxed_maps; + /* process CO-RE relocations non-strictly, allowing them to fail */ + bool relaxed_core_relocs; + /* maps that set the 'pinning' attribute in their definition will have + * their pin_path attribute set to a file in this directory, and be + * auto-pinned to that path on load; defaults to "/sys/fs/bpf". + */ + const char *pin_root_path; + __u32 attach_prog_fd; +}; +#define bpf_object_open_opts__last_field attach_prog_fd + LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +LIBBPF_API struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, + struct bpf_object_open_opts *opts); + +/* deprecated bpf_object__open variants */ +LIBBPF_API struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, + const char *name); +LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); -struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, - int flags); -LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, - size_t obj_buf_sz, - const char *name); + int bpf_object__section_size(const struct bpf_object *obj, const char *name, __u32 *size); int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); + +enum libbpf_pin_type { + LIBBPF_PIN_NONE, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + LIBBPF_PIN_BY_NAME, +}; + +/* pin_maps and unpin_maps can both be called with a NULL path, in which case + * they will use the pin_path attribute of each map (and ignore all maps that + * don't have a pin_path set). + */ LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, const char *path); @@ -127,6 +189,8 @@ libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type); LIBBPF_API int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type); +LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, + enum bpf_attach_type attach_type); /* Accessors of bpf_program */ struct bpf_program; @@ -153,6 +217,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); +/* returns program size in bytes */ +LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); + LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_version); LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); @@ -187,6 +254,8 @@ LIBBPF_API struct bpf_link * bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_trace(struct bpf_program *prog); struct bpf_insn; /* @@ -262,8 +331,14 @@ LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); + +LIBBPF_API enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); @@ -276,6 +351,7 @@ LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog); LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog); /* * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -335,6 +411,9 @@ LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); @@ -356,8 +435,18 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); +struct xdp_link_info { + __u32 prog_id; + __u32 drv_prog_id; + __u32 hw_prog_id; + __u32 skb_prog_id; + __u8 attach_mode; +}; + LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags); struct perf_buffer; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d04c7cb623ed..8ddc2c40e482 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -190,3 +190,21 @@ LIBBPF_0.0.5 { global: bpf_btf_get_next_id; } LIBBPF_0.0.4; + +LIBBPF_0.0.6 { + global: + bpf_get_link_xdp_info; + bpf_map__get_pin_path; + bpf_map__is_pinned; + bpf_map__set_pin_path; + bpf_object__open_file; + bpf_object__open_mem; + bpf_program__attach_trace; + bpf_program__get_expected_attach_type; + bpf_program__get_type; + bpf_program__is_tracing; + bpf_program__set_tracing; + bpf_program__size; + btf__find_by_name_kind; + libbpf_find_vmlinux_btf_id; +} LIBBPF_0.0.5; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 2e83a34f8c79..97ac17a64a58 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -34,6 +34,22 @@ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD)) #endif +/* Symbol versioning is different between static and shared library. + * Properly versioned symbols are needed for shared library, but + * only the symbol of the new version is needed for static library. + */ +#ifdef SHARED +# define COMPAT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@" #version); +# define DEFAULT_VERSION(internal_name, api_name, version) \ + asm(".symver " #internal_name "," #api_name "@@" #version); +#else +# define COMPAT_VERSION(internal_name, api_name, version) +# define DEFAULT_VERSION(internal_name, api_name, version) \ + extern typeof(internal_name) api_name \ + __attribute__((alias(#internal_name))); +#endif + extern void libbpf_print(enum libbpf_print_level level, const char *format, ...) __attribute__((format(printf, 2, 3))); @@ -43,10 +59,42 @@ do { \ libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ } while (0) -#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +static inline bool libbpf_validate_opts(const char *opts, + size_t opts_sz, size_t user_sz, + const char *type_name) +{ + if (user_sz < sizeof(size_t)) { + pr_warn("%s size (%zu) is too small\n", type_name, user_sz); + return false; + } + if (user_sz > opts_sz) { + size_t i; + + for (i = opts_sz; i < user_sz; i++) { + if (opts[i]) { + pr_warn("%s has non-zero extra bytes", + type_name); + return false; + } + } + } + return true; +} + +#define OPTS_VALID(opts, type) \ + (!(opts) || libbpf_validate_opts((const char *)opts, \ + offsetofend(struct type, \ + type##__last_field), \ + (opts)->sz, #type)) +#define OPTS_HAS(opts, field) \ + ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) +#define OPTS_GET(opts, field, fallback_value) \ + (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) + int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); @@ -78,7 +126,7 @@ struct btf_ext { }; struct btf_ext_info func_info; struct btf_ext_info line_info; - struct btf_ext_info offset_reloc_info; + struct btf_ext_info field_reloc_info; __u32 data_size; }; @@ -103,13 +151,27 @@ struct bpf_line_info_min { __u32 line_col; }; -/* The minimum bpf_offset_reloc checked by the loader +/* bpf_field_info_kind encodes which aspect of captured field has to be + * adjusted by relocations. Currently supported values are: + * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); + * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); + */ +enum bpf_field_info_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, + BPF_FIELD_LSHIFT_U64 = 4, + BPF_FIELD_RSHIFT_U64 = 5, +}; + +/* The minimum bpf_field_reloc checked by the loader * - * Offset relocation captures the following data: + * Field relocation captures the following data: * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual offset; + * its insn->imm field to be relocated with actual field info; * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * offset; + * field; * - access_str_off - offset into corresponding .BTF string section. String * itself encodes an accessed field using a sequence of field and array * indicies, separated by colon (:). It's conceptually very close to LLVM's @@ -140,15 +202,16 @@ struct bpf_line_info_min { * bpf_probe_read(&dst, sizeof(dst), * __builtin_preserve_access_index(&src->a.b.c)); * - * In this case Clang will emit offset relocation recording necessary data to + * In this case Clang will emit field relocation recording necessary data to * be able to find offset of embedded `a.b.c` field within `src` struct. * * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction */ -struct bpf_offset_reloc { +struct bpf_field_reloc { __u32 insn_off; __u32 type_id; __u32 access_str_off; + enum bpf_field_info_kind kind; }; #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 4b0b0364f5fc..a9eb8b322671 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_TRACING: default: break; } diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index ce3ec81b71c0..5065c1aa1061 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -12,6 +12,7 @@ #include "bpf.h" #include "libbpf.h" +#include "libbpf_internal.h" #include "nlattr.h" #ifndef SOL_NETLINK @@ -24,7 +25,7 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, struct xdp_id_md { int ifindex; __u32 flags; - __u32 id; + struct xdp_link_info info; }; int libbpf_netlink_open(__u32 *nl_pid) @@ -43,7 +44,7 @@ int libbpf_netlink_open(__u32 *nl_pid) if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)) < 0) { - fprintf(stderr, "Netlink error reporting not supported\n"); + pr_warn("Netlink error reporting not supported\n"); } if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { @@ -202,26 +203,11 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, return dump_link_nlmsg(cookie, ifi, tb); } -static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) -{ - if (mode != XDP_ATTACHED_MULTI) - return IFLA_XDP_PROG_ID; - if (flags & XDP_FLAGS_DRV_MODE) - return IFLA_XDP_DRV_PROG_ID; - if (flags & XDP_FLAGS_HW_MODE) - return IFLA_XDP_HW_PROG_ID; - if (flags & XDP_FLAGS_SKB_MODE) - return IFLA_XDP_SKB_PROG_ID; - - return IFLA_XDP_UNSPEC; -} - -static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) { struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; struct xdp_id_md *xdp_id = cookie; struct ifinfomsg *ifinfo = msg; - unsigned char mode, xdp_attr; int ret; if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) @@ -237,27 +223,40 @@ static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) if (!xdp_tb[IFLA_XDP_ATTACHED]) return 0; - mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); - if (mode == XDP_ATTACHED_NONE) - return 0; + xdp_id->info.attach_mode = libbpf_nla_getattr_u8( + xdp_tb[IFLA_XDP_ATTACHED]); - xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); - if (!xdp_attr || !xdp_tb[xdp_attr]) + if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE) return 0; - xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); + if (xdp_tb[IFLA_XDP_PROG_ID]) + xdp_id->info.prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_PROG_ID]); + + if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) + xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_SKB_PROG_ID]); + + if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) + xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_DRV_PROG_ID]); + + if (xdp_tb[IFLA_XDP_HW_PROG_ID]) + xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( + xdp_tb[IFLA_XDP_HW_PROG_ID]); return 0; } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, + size_t info_size, __u32 flags) { struct xdp_id_md xdp_id = {}; int sock, ret; __u32 nl_pid; __u32 mask; - if (flags & ~XDP_FLAGS_MASK) + if (flags & ~XDP_FLAGS_MASK || !info_size) return -EINVAL; /* Check whether the single {HW,DRV,SKB} mode is set */ @@ -273,14 +272,44 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) xdp_id.ifindex = ifindex; xdp_id.flags = flags; - ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); - if (!ret) - *prog_id = xdp_id.id; + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); + if (!ret) { + size_t sz = min(info_size, sizeof(xdp_id.info)); + + memcpy(info, &xdp_id.info, sz); + memset((void *) info + sz, 0, info_size - sz); + } close(sock); return ret; } +static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +{ + if (info->attach_mode != XDP_ATTACHED_MULTI) + return info->prog_id; + if (flags & XDP_FLAGS_DRV_MODE) + return info->drv_prog_id; + if (flags & XDP_FLAGS_HW_MODE) + return info->hw_prog_id; + if (flags & XDP_FLAGS_SKB_MODE) + return info->skb_prog_id; + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_link_info info; + int ret; + + ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); + if (!ret) + *prog_id = get_xdp_id(&info, flags); + + return ret; +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 1e69c0c8d413..8db44bbfc66d 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -8,6 +8,7 @@ #include <errno.h> #include "nlattr.h" +#include "libbpf_internal.h" #include <linux/rtnetlink.h> #include <string.h> #include <stdio.h> @@ -121,8 +122,8 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, } if (tb[type]) - fprintf(stderr, "Attribute of type %#x found multiple times in message, " - "previous attribute is being ignored.\n", type); + pr_warn("Attribute of type %#x found multiple times in message, " + "previous attribute is being ignored.\n", type); tb[type] = nla; } @@ -181,15 +182,14 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh) if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) { - fprintf(stderr, - "Failed to parse extended error attributes\n"); + pr_warn("Failed to parse extended error attributes\n"); return 0; } if (tb[NLMSGERR_ATTR_MSG]) errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); - fprintf(stderr, "Kernel error message: %s\n", errmsg); + pr_warn("Kernel error message: %s\n", errmsg); return 0; } diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.c index fc134873bb6d..f0eb2727b766 100644 --- a/tools/lib/bpf/test_libbpf.cpp +++ b/tools/lib/bpf/test_libbpf.c @@ -7,12 +7,14 @@ int main(int argc, char *argv[]) { - /* libbpf.h */ - libbpf_set_print(NULL); + /* libbpf.h */ + libbpf_set_print(NULL); - /* bpf.h */ - bpf_prog_get_fd_by_id(0); + /* bpf.h */ + bpf_prog_get_fd_by_id(0); - /* btf.h */ - btf__new(NULL, 0); + /* btf.h */ + btf__new(NULL, 0); + + return 0; } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 24fa313524fb..8e0ffa800a71 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -73,6 +73,21 @@ struct xsk_nl_info { int fd; }; +/* Up until and including Linux 5.3 */ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + int xsk_umem__fd(const struct xsk_umem *umem) { return umem ? umem->fd : -EINVAL; @@ -133,6 +148,58 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, return 0; } +static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) +{ + struct xdp_mmap_offsets_v1 off_v1; + + /* getsockopt on a kernel <= 5.3 has no flags fields. + * Copy over the offsets to the correct places in the >=5.4 format + * and put the flags where they would have been on that kernel. + */ + memcpy(&off_v1, off, sizeof(off_v1)); + + off->rx.producer = off_v1.rx.producer; + off->rx.consumer = off_v1.rx.consumer; + off->rx.desc = off_v1.rx.desc; + off->rx.flags = off_v1.rx.consumer + sizeof(__u32); + + off->tx.producer = off_v1.tx.producer; + off->tx.consumer = off_v1.tx.consumer; + off->tx.desc = off_v1.tx.desc; + off->tx.flags = off_v1.tx.consumer + sizeof(__u32); + + off->fr.producer = off_v1.fr.producer; + off->fr.consumer = off_v1.fr.consumer; + off->fr.desc = off_v1.fr.desc; + off->fr.flags = off_v1.fr.consumer + sizeof(__u32); + + off->cr.producer = off_v1.cr.producer; + off->cr.consumer = off_v1.cr.consumer; + off->cr.desc = off_v1.cr.desc; + off->cr.flags = off_v1.cr.consumer + sizeof(__u32); +} + +static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) +{ + socklen_t optlen; + int err; + + optlen = sizeof(*off); + err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); + if (err) + return err; + + if (optlen == sizeof(*off)) + return 0; + + if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { + xsk_mmap_offsets_v1(off); + return 0; + } + + return -EINVAL; +} + int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, @@ -141,7 +208,6 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, struct xdp_mmap_offsets off; struct xdp_umem_reg mr; struct xsk_umem *umem; - socklen_t optlen; void *map; int err; @@ -163,6 +229,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, umem->umem_area = umem_area; xsk_set_umem_config(&umem->config, usr_config); + memset(&mr, 0, sizeof(mr)); mr.addr = (uintptr_t)umem_area; mr.len = size; mr.chunk_size = umem->config.frame_size; @@ -189,8 +256,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area, goto out_socket; } - optlen = sizeof(off); - err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(umem->fd, &off); if (err) { err = -errno; goto out_socket; @@ -261,8 +327,8 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area, return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp, &config); } -asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2"); -asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4"); +COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2) +DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4) static int xsk_load_xdp_prog(struct xsk_socket *xsk) { @@ -273,33 +339,55 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* This is the C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { - * int index = ctx->rx_queue_index; + * int ret, index = ctx->rx_queue_index; * * // A set entry here means that the correspnding queue_id * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * if (ret > 0) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. * if (bpf_map_lookup_elem(&xsks_map, &index)) * return bpf_redirect_map(&xsks_map, index, 0); - * * return XDP_PASS; * } */ struct bpf_insn prog[] = { - /* r1 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r1 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* if w0 != 0 goto pc+13 */ + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), + /* r2 = r10 */ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* call bpf_map_lookup_elem */ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV32_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto +5 */ + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), /* r2 = *(u32 *)(r10 - 4) */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - BPF_MOV32_IMM(BPF_REG_3, 0), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ BPF_EMIT_CALL(BPF_FUNC_redirect_map), /* The jumps are to this instruction */ BPF_EXIT_INSN(), @@ -310,7 +398,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) "LGPL-2.1 or BSD-2-Clause", 0, log_buf, log_buf_size); if (prog_fd < 0) { - pr_warning("BPF log buffer:\n%s", log_buf); + pr_warn("BPF log buffer:\n%s", log_buf); return prog_fd; } @@ -343,13 +431,18 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) goto out; } - if (err || channels.max_combined == 0) + if (err) { /* If the device says it has no channels, then all traffic * is sent to a single stream, so max queues = 1. */ ret = 1; - else - ret = channels.max_combined; + } else { + /* Take the max of rx, tx, combined. Drivers return + * the number of channels in different ways. + */ + ret = max(channels.max_rx, channels.max_tx); + ret = max(ret, (int)channels.max_combined); + } out: close(fd); @@ -465,6 +558,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } else { xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (xsk->prog_fd < 0) + return -errno; err = xsk_lookup_bpf_maps(xsk); if (err) { close(xsk->prog_fd); @@ -472,7 +567,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk) } } - err = xsk_set_bpf_maps(xsk); + if (xsk->rx) + err = xsk_set_bpf_maps(xsk); if (err) { xsk_delete_bpf_maps(xsk); close(xsk->prog_fd); @@ -491,21 +587,26 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; struct xsk_socket *xsk; - socklen_t optlen; int err; - if (!umem || !xsk_ptr || !rx || !tx) + if (!umem || !xsk_ptr || !(rx || tx)) return -EFAULT; - if (umem->refcount) { - pr_warning("Error: shared umems not supported by libbpf.\n"); - return -EBUSY; - } - xsk = calloc(1, sizeof(*xsk)); if (!xsk) return -ENOMEM; + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_xsk_alloc; + + if (umem->refcount && + !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); + err = -EBUSY; + goto out_xsk_alloc; + } + if (umem->refcount++ > 0) { xsk->fd = socket(AF_XDP, SOCK_RAW, 0); if (xsk->fd < 0) { @@ -527,10 +628,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); xsk->ifname[IFNAMSIZ - 1] = '\0'; - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); - if (err) - goto out_socket; - if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, &xsk->config.rx_size, @@ -550,8 +647,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } } - optlen = sizeof(off); - err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(xsk->fd, &off); if (err) { err = -errno; goto out_socket; @@ -599,7 +695,12 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = xsk->ifindex; sxdp.sxdp_queue_id = xsk->queue_id; - sxdp.sxdp_flags = xsk->config.bind_flags; + if (umem->refcount > 1) { + sxdp.sxdp_flags = XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = xsk->config.bind_flags; + } err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); if (err) { @@ -637,7 +738,6 @@ out_xsk_alloc: int xsk_umem__delete(struct xsk_umem *umem) { struct xdp_mmap_offsets off; - socklen_t optlen; int err; if (!umem) @@ -646,8 +746,7 @@ int xsk_umem__delete(struct xsk_umem *umem) if (umem->refcount) return -EBUSY; - optlen = sizeof(off); - err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(umem->fd, &off); if (!err) { munmap(umem->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * sizeof(__u64)); @@ -665,7 +764,6 @@ void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); struct xdp_mmap_offsets off; - socklen_t optlen; int err; if (!xsk) @@ -676,8 +774,7 @@ void xsk_socket__delete(struct xsk_socket *xsk) close(xsk->prog_fd); } - optlen = sizeof(off); - err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + err = xsk_get_mmap_offsets(xsk->fd, &off); if (!err) { if (xsk->rx) { munmap(xsk->rx->ring - off.rx.desc, |