diff options
24 files changed, 433 insertions, 104 deletions
diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1c89017beebb..8cdc4415fa70 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -377,6 +377,7 @@ static int veth_xdp_xmit(struct net_device *dev, int n, unsigned int max_len; struct veth_rq *rq; + rcu_read_lock(); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; goto drop; @@ -418,11 +419,14 @@ static int veth_xdp_xmit(struct net_device *dev, int n, if (flags & XDP_XMIT_FLUSH) __veth_xdp_flush(rq); - if (likely(!drops)) + if (likely(!drops)) { + rcu_read_unlock(); return n; + } ret = n - drops; drop: + rcu_read_unlock(); atomic64_add(drops, &priv->dropped); return ret; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c458cd313281..2fe7a3188282 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -501,7 +501,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ - xdp_prog = rcu_dereference(rq->xdp_prog); + xdp_prog = rcu_access_pointer(rq->xdp_prog); if (!xdp_prog) return -ENXIO; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9687861fd7e..8e9ad3943cd9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .func = &name##func, \ @@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_PTR(name) (&name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); +struct bpf_image { + struct latch_tree_node tnode; + unsigned char data[]; +}; +#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) +bool is_bpf_image_address(unsigned long address); +void *bpf_image_alloc(void); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to) {} +static inline bool is_bpf_image_address(unsigned long address) +{ + return false; +} #endif struct bpf_func_info_aux { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 32963b6d5a9c..b7c1660fb594 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) } } +static bool is_string_ptr(struct btf *btf, const struct btf_type *t) +{ + /* t comes in already as a pointer */ + t = btf_type_by_id(btf, t->type); + + /* allow const */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) + t = btf_type_by_id(btf, t->type); + + /* char, signed char, unsigned char */ + return btf_type_is_int(t) && t->size == 1; +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, */ return true; + if (is_string_ptr(btf, t)) + return true; + /* this is a pointer to another type */ info->reg_type = PTR_TO_BTF_ID; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index de630f980282..58bdca5d978a 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -190,10 +190,12 @@ static void dev_map_free(struct bpf_map *map) /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete. The rcu critical section only guarantees - * no further reads against netdev_map. It does __not__ ensure pending - * flush operations (if any) are complete. + * disconnected from events. The following synchronize_rcu() guarantees + * both rcu read critical sections complete and waits for + * preempt-disable regions (NAPI being the relevant context here) so we + * are certain there will be no further reads against the netdev_map and + * all flush operations are complete. Flush operations can only be done + * from NAPI context for this reason. */ spin_lock(&dev_map_lock); @@ -263,7 +265,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) struct hlist_head *head = dev_map_index_hash(dtab, key); struct bpf_dtab_netdev *dev; - hlist_for_each_entry_rcu(dev, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist, + lockdep_is_held(&dtab->index_lock)) if (dev->idx == key) return dev; @@ -363,16 +366,17 @@ error: * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the * net device can be torn down. On devmap tear down we ensure the flush list * is empty before completing to ensure all flush operations have completed. + * When drivers update the bpf program they may need to ensure any flush ops + * are also complete. Using synchronize_rcu or call_rcu will suffice for this + * because both wait for napi context to exit. */ void __dev_flush(void) { struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); struct xdp_dev_bulk_queue *bq, *tmp; - rcu_read_lock(); list_for_each_entry_safe(bq, tmp, flush_list, flush_node) bq_xmit_all(bq, XDP_XMIT_FLUSH); - rcu_read_unlock(); } /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or @@ -502,12 +506,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return -EINVAL; /* Use call_rcu() here to ensure any rcu critical sections have - * completed, but this does not guarantee a flush has happened - * yet. Because driver side rcu_read_lock/unlock only protects the - * running XDP program. However, for pending flush operations the - * dev and ctx are stored in another per cpu map. And additionally, - * the driver tear down ensures all soft irqs are complete before - * removing the net device in the case of dev_put equals zero. + * completed as well as any flush operations because call_rcu + * will wait for preempt-disable region to complete, NAPI in this + * context. And additionally, the driver tear down ensures all + * soft irqs are complete before removing the net device in the + * case of dev_put equals zero. */ old_dev = xchg(&dtab->netdev_map[k], NULL); if (old_dev) diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 204ee61a3904..b3e5b214fed8 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) noff = 0; } else { old = d->image + d->image_off; - noff = d->image_off ^ (PAGE_SIZE / 2); + noff = d->image_off ^ (BPF_IMAGE_SIZE / 2); } new = d->num_progs ? d->image + noff : NULL; @@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_jit_alloc_exec_page(); + d->image = bpf_image_alloc(); if (!d->image) goto out; } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index e11059b99f18..bd2fd8eab470 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -196,6 +196,7 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) void *key = map_iter(m)->key; void *prev_key; + (*pos)++; if (map_iter(m)->done) return NULL; @@ -208,8 +209,6 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) map_iter(m)->done = true; return NULL; } - - ++(*pos); return key; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index eb64c245052b..6b264a92064b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/filter.h> #include <linux/ftrace.h> +#include <linux/rbtree_latch.h> /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = { #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; +static struct latch_tree_root image_tree __cacheline_aligned; -/* serializes access to trampoline_table */ +/* serializes access to trampoline_table and image_tree */ static DEFINE_MUTEX(trampoline_mutex); -void *bpf_jit_alloc_exec_page(void) +static void *bpf_jit_alloc_exec_page(void) { void *image; @@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void) return image; } +static __always_inline bool image_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + struct bpf_image *ia = container_of(a, struct bpf_image, tnode); + struct bpf_image *ib = container_of(b, struct bpf_image, tnode); + + return ia < ib; +} + +static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n) +{ + void *image = container_of(n, struct bpf_image, tnode); + + if (addr < image) + return -1; + if (addr >= image + PAGE_SIZE) + return 1; + + return 0; +} + +static const struct latch_tree_ops image_tree_ops = { + .less = image_tree_less, + .comp = image_tree_comp, +}; + +static void *__bpf_image_alloc(bool lock) +{ + struct bpf_image *image; + + image = bpf_jit_alloc_exec_page(); + if (!image) + return NULL; + + if (lock) + mutex_lock(&trampoline_mutex); + latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops); + if (lock) + mutex_unlock(&trampoline_mutex); + return image->data; +} + +void *bpf_image_alloc(void) +{ + return __bpf_image_alloc(true); +} + +bool is_bpf_image_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL; + rcu_read_unlock(); + + return ret; +} + struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { struct bpf_trampoline *tr; @@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) goto out; /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ - image = bpf_jit_alloc_exec_page(); + image = __bpf_image_alloc(false); if (!image) { kfree(tr); tr = NULL; @@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) } /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into PAGE_SIZE / 2 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ #define BPF_MAX_TRAMP_PROGS 40 static int bpf_trampoline_update(struct bpf_trampoline *tr) { - void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; - void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; + void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2; + void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2; struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; @@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) */ synchronize_rcu_tasks(); - err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, + err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2, &tr->func.model, flags, fentry, fentry_cnt, fexit, fexit_cnt, @@ -284,6 +344,8 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { + struct bpf_image *image; + if (!tr) return; mutex_lock(&trampoline_mutex); @@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) goto out; if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; + image = container_of(tr->image, struct bpf_image, data); + latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops); /* wait for tasks to get out of trampoline before freeing it */ synchronize_rcu_tasks(); - bpf_jit_free_exec(tr->image); + bpf_jit_free_exec(image); hlist_del(&tr->hlist); kfree(tr); out: diff --git a/kernel/extable.c b/kernel/extable.c index f6920a11e28a..a0024f27d3a1 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr) * triggers a stack trace, or a WARN() that happens during * coming back from idle, or cpu on or offlining. * - * is_module_text_address() as well as the kprobe slots - * and is_bpf_text_address() require RCU to be watching. + * is_module_text_address() as well as the kprobe slots, + * is_bpf_text_address() and is_bpf_image_address require + * RCU to be watching. */ no_rcu = !rcu_is_watching(); @@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr) goto out; if (is_bpf_text_address(addr)) goto out; + if (is_bpf_image_address(addr)) + goto out; ret = 0; out: if (no_rcu) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f560b4902060..a1670dff0629 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, @@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); + else if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + + if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 4ba90d81b6a1..b3745ed711ba 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -77,6 +77,20 @@ static const char *btf_var_linkage_str(__u32 linkage) } } +static const char *btf_func_linkage_str(const struct btf_type *t) +{ + switch (btf_vlen(t)) { + case BTF_FUNC_STATIC: + return "static"; + case BTF_FUNC_GLOBAL: + return "global"; + case BTF_FUNC_EXTERN: + return "extern"; + default: + return "(unknown)"; + } +} + static const char *btf_str(const struct btf *btf, __u32 off) { if (!off) @@ -231,12 +245,17 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" fwd_kind=%s", fwd_kind); break; } - case BTF_KIND_FUNC: - if (json_output) + case BTF_KIND_FUNC: { + const char *linkage = btf_func_linkage_str(t); + + if (json_output) { jsonw_uint_field(w, "type_id", t->type); - else - printf(" type_id=%u", t->type); + jsonw_string_field(w, "linkage", linkage); + } else { + printf(" type_id=%u linkage=%s", t->type, linkage); + } break; + } case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = (const void *)(t + 1); __u16 vlen = BTF_INFO_VLEN(t->info); diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index faf5418609ea..0c021352beed 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) OUTPUT := .output -CLANG := clang -LLC := llc -LLVM_STRIP := llvm-strip +CLANG ?= clang +LLC ?= llc +LLVM_STRIP ?= llvm-strip DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ae34b681ae82..514b1a524abb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -345,7 +345,6 @@ struct bpf_object { bool loaded; bool has_pseudo_calls; - bool relaxed_core_relocs; /* * Information when doing elf related work. Only valid if fd @@ -3870,7 +3869,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("[%d] %s: found candidate [%d] %s\n", local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); + new_ids = reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; @@ -4238,25 +4239,38 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, */ static int bpf_core_reloc_insn(struct bpf_program *prog, const struct bpf_field_reloc *relo, + int relo_idx, const struct bpf_core_spec *local_spec, const struct bpf_core_spec *targ_spec) { - bool failed = false, validate = true; __u32 orig_val, new_val; struct bpf_insn *insn; + bool validate = true; int insn_idx, err; __u8 class; if (relo->insn_off % sizeof(struct bpf_insn)) return -EINVAL; insn_idx = relo->insn_off / sizeof(struct bpf_insn); + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); if (relo->kind == BPF_FIELD_EXISTS) { orig_val = 1; /* can't generate EXISTS relo w/o local field */ new_val = targ_spec ? 1 : 0; } else if (!targ_spec) { - failed = true; - new_val = (__u32)-1; + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + bpf_program__title(prog, false), relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ + return 0; } else { err = bpf_core_calc_field_relo(prog, relo, local_spec, &orig_val, &validate); @@ -4268,50 +4282,47 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return err; } - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - switch (class) { case BPF_ALU: case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->imm, orig_val, new_val); + if (validate && insn->imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->imm, orig_val, new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); break; case BPF_LDX: case BPF_ST: case BPF_STX: - if (!failed && validate && insn->off != orig_val) { - pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->off, orig_val, new_val); + if (validate && insn->off != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->off, orig_val, new_val); return -EINVAL; } if (new_val > SHRT_MAX) { - pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n", - bpf_program__title(prog, false), insn_idx, - new_val); + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, new_val); return -ERANGE; } orig_val = insn->off; insn->off = new_val; - pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); break; default: - pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), relo_idx, insn_idx, insn->code, insn->src_reg, insn->dst_reg, insn->off, insn->imm); return -EINVAL; @@ -4510,24 +4521,33 @@ static int bpf_core_reloc_field(struct bpf_program *prog, } /* - * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is - * requested, it's expected that we might not find any candidates. - * In this case, if field wasn't found in any candidate, the list of - * candidates shouldn't change at all, we'll just handle relocating - * appropriately, depending on relo's kind. + * For BPF_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't + * change at all, we'll just handle relocating appropriately, + * depending on relo's kind. */ if (j > 0) cand_ids->len = j; - if (j == 0 && !prog->obj->relaxed_core_relocs && - relo->kind != BPF_FIELD_EXISTS) { - pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); - return -ESRCH; - } + /* + * If no candidates were found, it might be both a programmer error, + * as well as expected case, depending whether instruction w/ + * relocation is guarded in some way that makes it unreachable (dead + * code) if relocation can't be resolved. This is handled in + * bpf_core_reloc_insn() uniformly by replacing that instruction with + * BPF helper call insn (using invalid helper ID). If that instruction + * is indeed unreachable, then it will be ignored and eliminated by + * verifier. If it was an error, then verifier will complain and point + * to a specific instruction number in its log. + */ + if (j == 0) + pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); /* bpf_core_reloc_insn should know how to handle missing targ_spec */ - err = bpf_core_reloc_insn(prog, relo, &local_spec, + err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, j ? &targ_spec : NULL); if (err) { pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", @@ -5057,7 +5077,6 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; - obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); kconfig = OPTS_GET(opts, kconfig, NULL); if (kconfig) { obj->kconfig = strdup(kconfig); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 2a5e3b087002..3fe12c9d1f92 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -77,7 +77,11 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; - /* process CO-RE relocations non-strictly, allowing them to fail */ + /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures. + * Value is ignored. Relocations always are processed non-strictly. + * Non-relocatable instructions are replaced with invalid ones to + * prevent accidental errors. + * */ bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5f41f5bd8033..257a1aaaa37d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -155,16 +155,17 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BUILD_DIR)/bpftool - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - OUTPUT=$(BUILD_DIR)/bpftool/ \ +$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ + $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + OUTPUT=$(BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(SCRATCH_DIR)/ install -$(BPFOBJ): $(wildcard $(BPFDIR)/*.c $(BPFDIR)/*.h $(BPFDIR)/Makefile) \ +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): $(call msg,MKDIR,,$@) diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index e1a379f5f7d2..5cc06021f27d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -9,7 +9,7 @@ void test_fentry_test(void) struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; int err, pkt_fd, i; - __u32 duration, retval; + __u32 duration = 0, retval; __u64 *result; pkt_skel = test_pkt_access__open_and_load(); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index db5c74d2ce6d..cde463af7071 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -11,7 +11,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, int err, pkt_fd, i; struct bpf_link **link = NULL; struct bpf_program **prog = NULL; - __u32 duration, retval; + __u32 duration = 0, retval; struct bpf_map *data_map; const int zero = 0; u64 *result = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index f99013222c74..d2c3655dd7a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -13,7 +13,7 @@ void test_fexit_test(void) int err, pkt_fd, kfree_skb_fd, i; struct bpf_link *link[6] = {}; struct bpf_program *prog[6]; - __u32 duration, retval; + __u32 duration = 0, retval; struct bpf_map *data_map; const int zero = 0; u64 result[6]; diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 2c37ae7dc214..098bcae5f827 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -33,7 +33,7 @@ #define REUSEPORT_ARRAY_SIZE 32 static int result_map, tmp_index_ovr_map, linum_map, data_check_map; -static enum result expected_results[NR_RESULTS]; +static __u32 expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; static int reuseport_array = -1, outer_map = -1; static int select_by_skb_data_prog; @@ -316,6 +316,26 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, expected.len, result.len, get_linum()); } +static const char *result_to_str(enum result res) +{ + switch (res) { + case DROP_ERR_INNER_MAP: + return "DROP_ERR_INNER_MAP"; + case DROP_ERR_SKB_DATA: + return "DROP_ERR_SKB_DATA"; + case DROP_ERR_SK_SELECT_REUSEPORT: + return "DROP_ERR_SK_SELECT_REUSEPORT"; + case DROP_MISC: + return "DROP_MISC"; + case PASS: + return "PASS"; + case PASS_ERR_SK_SELECT_REUSEPORT: + return "PASS_ERR_SK_SELECT_REUSEPORT"; + default: + return "UNKNOWN"; + } +} + static void check_results(void) { __u32 results[NR_RESULTS]; @@ -351,10 +371,10 @@ static void check_results(void) printf(", %u", expected_results[i]); printf("]\n"); - RET_IF(expected_results[broken] != results[broken], - "unexpected result", - "expected_results[%u] != results[%u] bpf_prog_linum:%ld\n", - broken, broken, get_linum()); + printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken), + get_linum()); + + CHECK_FAIL(true); } static int send_data(int type, sa_family_t family, void *data, size_t len, @@ -677,7 +697,19 @@ static void setup_per_test(int type, sa_family_t family, bool inany, static void cleanup_per_test(bool no_inner_map) { - int i, err; + int i, err, zero = 0; + + memset(expected_results, 0, sizeof(expected_results)); + + for (i = 0; i < NR_RESULTS; i++) { + err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY); + RET_IF(err, "reset elem in result_map", + "i:%u err:%d errno:%d\n", i, err, errno); + } + + err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY); + RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n", + err, errno); for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) close(sk_fds[i]); diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c new file mode 100644 index 000000000000..1235f3d1cc50 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include <sched.h> +#include <sys/prctl.h> +#include <test_progs.h> + +#define MAX_TRAMP_PROGS 40 + +struct inst { + struct bpf_object *obj; + struct bpf_link *link_fentry; + struct bpf_link *link_fexit; +}; + +static int test_task_rename(void) +{ + int fd, duration = 0, err; + char buf[] = "test_overhead"; + + fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(fd < 0, "open /proc", "err %d", errno)) + return -1; + err = write(fd, buf, sizeof(buf)); + if (err < 0) { + CHECK(err < 0, "task rename", "err %d", errno); + close(fd); + return -1; + } + close(fd); + return 0; +} + +static struct bpf_link *load(struct bpf_object *obj, const char *name) +{ + struct bpf_program *prog; + int duration = 0; + + prog = bpf_object__find_program_by_title(obj, name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) + return ERR_PTR(-EINVAL); + return bpf_program__attach_trace(prog); +} + +void test_trampoline_count(void) +{ + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; + const char *object = "test_trampoline_count.o"; + struct inst inst[MAX_TRAMP_PROGS] = { 0 }; + int err, i = 0, duration = 0; + struct bpf_object *obj; + struct bpf_link *link; + char comm[16] = {}; + + /* attach 'allowed' 40 trampoline programs */ + for (i = 0; i < MAX_TRAMP_PROGS; i++) { + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + inst[i].obj = obj; + + if (rand() % 2) { + link = load(obj, fentry_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fentry = link; + } else { + link = load(obj, fexit_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fexit = link; + } + } + + /* and try 1 extra.. */ + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup_extra; + + /* ..that needs to fail */ + link = load(obj, fentry_name); + if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) { + bpf_link__destroy(link); + goto cleanup_extra; + } + + /* with E2BIG error */ + CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link)); + + /* and finaly execute the probe */ + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) + goto cleanup_extra; + CHECK_FAIL(test_task_rename()); + CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L)); + +cleanup_extra: + bpf_object__close(obj); +cleanup: + while (--i) { + bpf_link__destroy(inst[i].link_fentry); + bpf_link__destroy(inst[i].link_fexit); + bpf_object__close(inst[i].obj); + } +} diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index d69a1f2bbbfd..26e77dcc7e91 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md) data_check.skb_ports[0] = th->source; data_check.skb_ports[1] = th->dest; + if (th->fin) + /* The connection is being torn down at the end of a + * test. It can't contain a cmd, so return early. + */ + return SK_PASS; + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) GOTO_DONE(DROP_ERR_SKB_DATA); if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c new file mode 100644 index 000000000000..e51e6e3a81c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdbool.h> +#include <stddef.h> +#include <linux/bpf.h> +#include "bpf_trace_helpers.h" + +struct task_struct; + +SEC("fentry/__set_task_comm") +int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +SEC("fexit/__set_task_comm") +int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index a8485ae103d1..174b72a64a4c 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -139,6 +139,20 @@ echo "Testing IPv4 + GRE..." tc filter del dev lo ingress pref 1337 +echo "Testing port range..." +# Drops all IP/UDP packets coming from port 8-10 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 8-10 action drop + +# Send 10 IPv4/UDP packets from port 7. Filter should not drop any. +./test_flow_dissector -i 4 -f 7 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 11. Filter should not drop any. +./test_flow_dissector -i 4 -f 11 + +tc filter del dev lo ingress pref 1337 + echo "Testing IPv6..." # Drops all IPv6/UDP packets coming from port 9 tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 4a851513c842..779e11da979c 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, FILE *file; int i, fp; - file = fopen(".sendpage_tst.tmp", "w+"); + file = tmpfile(); if (!file) { perror("create file for sendpage"); return 1; @@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, fwrite(&k, sizeof(char), 1, file); fflush(file); fseek(file, 0, SEEK_SET); - fclose(file); - fp = open(".sendpage_tst.tmp", O_RDONLY); - if (fp < 0) { - perror("reopen file for sendpage"); - return 1; - } + fp = fileno(file); clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { @@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, if (!drop && sent < 0) { perror("send loop error"); - close(fp); + fclose(file); return sent; } else if (drop && sent >= 0) { printf("sendpage loop error expected: %i\n", sent); - close(fp); + fclose(file); return -EIO; } @@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, s->bytes_sent += sent; } clock_gettime(CLOCK_MONOTONIC, &s->end); - close(fp); + fclose(file); return 0; } |