summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 06:20:58 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-02 06:20:58 -0700
commitfc02cb2b37fe2cbf1d3334b9f0f0eab9431766c4 (patch)
tree93b16bc48fdc3be4a1adccbf4c7de92a5e8440e1 /kernel
parentbfc484fe6abba4b89ec9330e0e68778e2a9856b2 (diff)
parent84882cf72cd774cf16fd338bdbf00f69ac9f9194 (diff)
downloadlinux-fc02cb2b37fe2cbf1d3334b9f0f0eab9431766c4.tar.bz2
Merge tag 'net-next-for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Remove socket skb caches - Add a SO_RESERVE_MEM socket op to forward allocate buffer space and avoid memory accounting overhead on each message sent - Introduce managed neighbor entries - added by control plane and resolved by the kernel for use in acceleration paths (BPF / XDP right now, HW offload users will benefit as well) - Make neighbor eviction on link down controllable by userspace to work around WiFi networks with bad roaming implementations - vrf: Rework interaction with netfilter/conntrack - fq_codel: implement L4S style ce_threshold_ect1 marking - sch: Eliminate unnecessary RCU waits in mini_qdisc_pair_swap() BPF: - Add support for new btf kind BTF_KIND_TAG, arbitrary type tagging as implemented in LLVM14 - Introduce bpf_get_branch_snapshot() to capture Last Branch Records - Implement variadic trace_printk helper - Add a new Bloomfilter map type - Track <8-byte scalar spill and refill - Access hw timestamp through BPF's __sk_buff - Disallow unprivileged BPF by default - Document BPF licensing Netfilter: - Introduce egress hook for looking at raw outgoing packets - Allow matching on and modifying inner headers / payload data - Add NFT_META_IFTYPE to match on the interface type either from ingress or egress Protocols: - Multi-Path TCP: - increase default max additional subflows to 2 - rework forward memory allocation - add getsockopts: MPTCP_INFO, MPTCP_TCPINFO, MPTCP_SUBFLOW_ADDRS - MCTP flow support allowing lower layer drivers to configure msg muxing as needed - Automatic Multicast Tunneling (AMT) driver based on RFC7450 - HSR support the redbox supervision frames (IEC-62439-3:2018) - Support for the ip6ip6 encapsulation of IOAM - Netlink interface for CAN-FD's Transmitter Delay Compensation - Support SMC-Rv2 eliminating the current same-subnet restriction, by exploiting the UDP encapsulation feature of RoCE adapters - TLS: add SM4 GCM/CCM crypto support - Bluetooth: initial support for link quality and audio/codec offload Driver APIs: - Add a batched interface for RX buffer allocation in AF_XDP buffer pool - ethtool: Add ability to control transceiver modules' power mode - phy: Introduce supported interfaces bitmap to express MAC capabilities and simplify PHY code - Drop rtnl_lock from DSA .port_fdb_{add,del} callbacks New drivers: - WiFi driver for Realtek 8852AE 802.11ax devices (rtw89) - Ethernet driver for ASIX AX88796C SPI device (x88796c) Drivers: - Broadcom PHYs - support 72165, 7712 16nm PHYs - support IDDQ-SR for additional power savings - PHY support for QCA8081, QCA9561 PHYs - NXP DPAA2: support for IRQ coalescing - NXP Ethernet (enetc): support for software TCP segmentation - Renesas Ethernet (ravb) - support DMAC and EMAC blocks of Gigabit-capable IP found on RZ/G2L SoC - Intel 100G Ethernet - support for eswitch offload of TC/OvS flow API, including offload of GRE, VxLAN, Geneve tunneling - support application device queues - ability to assign Rx and Tx queues to application threads - PTP and PPS (pulse-per-second) extensions - Broadcom Ethernet (bnxt) - devlink health reporting and device reload extensions - Mellanox Ethernet (mlx5) - offload macvlan interfaces - support HW offload of TC rules involving OVS internal ports - support HW-GRO and header/data split - support application device queues - Marvell OcteonTx2: - add XDP support for PF - add PTP support for VF - Qualcomm Ethernet switch (qca8k): support for QCA8328 - Realtek Ethernet DSA switch (rtl8366rb) - support bridge offload - support STP, fast aging, disabling address learning - support for Realtek RTL8365MB-VC, a 4+1 port 10M/100M/1GE switch - Mellanox Ethernet/IB switch (mlxsw) - multi-level qdisc hierarchy offload (e.g. RED, prio and shaping) - offload root TBF qdisc as port shaper - support multiple routing interface MAC address prefixes - support for IP-in-IP with IPv6 underlay - MediaTek WiFi (mt76) - mt7921 - ASPM, 6GHz, SDIO and testmode support - mt7915 - LED and TWT support - Qualcomm WiFi (ath11k) - include channel rx and tx time in survey dump statistics - support for 80P80 and 160 MHz bandwidths - support channel 2 in 6 GHz band - spectral scan support for QCN9074 - support for rx decapsulation offload (data frames in 802.3 format) - Qualcomm phone SoC WiFi (wcn36xx) - enable Idle Mode Power Save (IMPS) to reduce power consumption during idle - Bluetooth driver support for MediaTek MT7922 and MT7921 - Enable support for AOSP Bluetooth extension in Qualcomm WCN399x and Realtek 8822C/8852A - Microsoft vNIC driver (mana) - support hibernation and kexec - Google vNIC driver (gve) - support for jumbo frames - implement Rx page reuse Refactor: - Make all writes to netdev->dev_addr go thru helpers, so that we can add this address to the address rbtree and handle the updates - Various TCP cleanups and optimizations including improvements to CPU cache use - Simplify the gnet_stats, Qdisc stats' handling and remove qdisc->running sequence counter - Driver changes and API updates to address devlink locking deficiencies" * tag 'net-next-for-5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2122 commits) Revert "net: avoid double accounting for pure zerocopy skbs" selftests: net: add arp_ndisc_evict_nocarrier net: ndisc: introduce ndisc_evict_nocarrier sysctl parameter net: arp: introduce arp_evict_nocarrier sysctl parameter libbpf: Deprecate AF_XDP support kbuild: Unify options for BTF generation for vmlinux and modules selftests/bpf: Add a testcase for 64-bit bounds propagation issue. bpf: Fix propagation of signed bounds from 64-bit min/max into 32-bit. bpf: Fix propagation of bounds from 64-bit min/max into 32-bit and var_off. net: vmxnet3: remove multiple false checks in vmxnet3_ethtool.c net: avoid double accounting for pure zerocopy skbs tcp: rename sk_wmem_free_skb netdevsim: fix uninit value in nsim_drv_configure_vfs() selftests/bpf: Fix also no-alu32 strobemeta selftest bpf: Add missing map_delete_elem method to bloom filter map selftests/bpf: Add bloom map success test for userspace calls bpf: Add alignment padding for "map_extra" + consolidate holes bpf: Bloom filter map naming fixups selftests/bpf: Add test cases for struct_ops prog bpf: Add dummy BPF STRUCT_OPS for test purpose ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Kconfig7
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c7
-rw-r--r--kernel/bpf/bloom_filter.c204
-rw-r--r--kernel/bpf/bpf_struct_ops.c32
-rw-r--r--kernel/bpf/bpf_struct_ops_types.h3
-rw-r--r--kernel/bpf/btf.c183
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/hashtab.c13
-rw-r--r--kernel/bpf/helpers.c11
-rw-r--r--kernel/bpf/preload/.gitignore4
-rw-r--r--kernel/bpf/preload/Makefile26
-rw-r--r--kernel/bpf/preload/iterators/Makefile38
-rw-r--r--kernel/bpf/syscall.c77
-rw-r--r--kernel/bpf/trampoline.c15
-rw-r--r--kernel/bpf/verifier.c373
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/trace/bpf_trace.c102
18 files changed, 948 insertions, 160 deletions
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index a82d6de86522..d24d518ddd63 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -64,6 +64,7 @@ config BPF_JIT_DEFAULT_ON
config BPF_UNPRIV_DEFAULT_OFF
bool "Disable unprivileged BPF by default"
+ default y
depends on BPF_SYSCALL
help
Disables unprivileged BPF by default by setting the corresponding
@@ -72,6 +73,12 @@ config BPF_UNPRIV_DEFAULT_OFF
disable it by setting it to 1 (from which no other transition to
0 is possible anymore).
+ Unprivileged BPF could be used to exploit certain potential
+ speculative execution side-channel vulnerabilities on unmitigated
+ affected hardware.
+
+ If you are unsure how to answer this question, answer Y.
+
source "kernel/bpf/preload/Kconfig"
config BPF_LSM
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 7f33098ca63f..cf6ca339f3cd 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -7,7 +7,7 @@ endif
CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 447def540544..c7a5be3bf8be 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -645,7 +645,7 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
};
-static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
+static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
void *callback_ctx, u64 flags)
{
u32 i, key, num_elems = 0;
@@ -668,9 +668,8 @@ static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
val = array->value + array->elem_size * i;
num_elems++;
key = i;
- ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
- (u64)(long)&key, (u64)(long)val,
- (u64)(long)callback_ctx, 0);
+ ret = callback_fn((u64)(long)map, (u64)(long)&key,
+ (u64)(long)val, (u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret)
break;
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
new file mode 100644
index 000000000000..277a05e9c984
--- /dev/null
+++ b/kernel/bpf/bloom_filter.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bitmap.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#define BLOOM_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK)
+
+struct bpf_bloom_filter {
+ struct bpf_map map;
+ u32 bitset_mask;
+ u32 hash_seed;
+ /* If the size of the values in the bloom filter is u32 aligned,
+ * then it is more performant to use jhash2 as the underlying hash
+ * function, else we use jhash. This tracks the number of u32s
+ * in an u32-aligned value size. If the value size is not u32 aligned,
+ * this will be 0.
+ */
+ u32 aligned_u32_count;
+ u32 nr_hash_funcs;
+ unsigned long bitset[];
+};
+
+static u32 hash(struct bpf_bloom_filter *bloom, void *value,
+ u32 value_size, u32 index)
+{
+ u32 h;
+
+ if (bloom->aligned_u32_count)
+ h = jhash2(value, bloom->aligned_u32_count,
+ bloom->hash_seed + index);
+ else
+ h = jhash(value, value_size, bloom->hash_seed + index);
+
+ return h & bloom->bitset_mask;
+}
+
+static int bloom_map_peek_elem(struct bpf_map *map, void *value)
+{
+ struct bpf_bloom_filter *bloom =
+ container_of(map, struct bpf_bloom_filter, map);
+ u32 i, h;
+
+ for (i = 0; i < bloom->nr_hash_funcs; i++) {
+ h = hash(bloom, value, map->value_size, i);
+ if (!test_bit(h, bloom->bitset))
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags)
+{
+ struct bpf_bloom_filter *bloom =
+ container_of(map, struct bpf_bloom_filter, map);
+ u32 i, h;
+
+ if (flags != BPF_ANY)
+ return -EINVAL;
+
+ for (i = 0; i < bloom->nr_hash_funcs; i++) {
+ h = hash(bloom, value, map->value_size, i);
+ set_bit(h, bloom->bitset);
+ }
+
+ return 0;
+}
+
+static int bloom_map_pop_elem(struct bpf_map *map, void *value)
+{
+ return -EOPNOTSUPP;
+}
+
+static int bloom_map_delete_elem(struct bpf_map *map, void *value)
+{
+ return -EOPNOTSUPP;
+}
+
+static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
+{
+ u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
+ int numa_node = bpf_map_attr_numa_node(attr);
+ struct bpf_bloom_filter *bloom;
+
+ if (!bpf_capable())
+ return ERR_PTR(-EPERM);
+
+ if (attr->key_size != 0 || attr->value_size == 0 ||
+ attr->max_entries == 0 ||
+ attr->map_flags & ~BLOOM_CREATE_FLAG_MASK ||
+ !bpf_map_flags_access_ok(attr->map_flags) ||
+ /* The lower 4 bits of map_extra (0xF) specify the number
+ * of hash functions
+ */
+ (attr->map_extra & ~0xF))
+ return ERR_PTR(-EINVAL);
+
+ nr_hash_funcs = attr->map_extra;
+ if (nr_hash_funcs == 0)
+ /* Default to using 5 hash functions if unspecified */
+ nr_hash_funcs = 5;
+
+ /* For the bloom filter, the optimal bit array size that minimizes the
+ * false positive probability is n * k / ln(2) where n is the number of
+ * expected entries in the bloom filter and k is the number of hash
+ * functions. We use 7 / 5 to approximate 1 / ln(2).
+ *
+ * We round this up to the nearest power of two to enable more efficient
+ * hashing using bitmasks. The bitmask will be the bit array size - 1.
+ *
+ * If this overflows a u32, the bit array size will have 2^32 (4
+ * GB) bits.
+ */
+ if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) ||
+ check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) ||
+ nr_bits > (1UL << 31)) {
+ /* The bit array size is 2^32 bits but to avoid overflowing the
+ * u32, we use U32_MAX, which will round up to the equivalent
+ * number of bytes
+ */
+ bitset_bytes = BITS_TO_BYTES(U32_MAX);
+ bitset_mask = U32_MAX;
+ } else {
+ if (nr_bits <= BITS_PER_LONG)
+ nr_bits = BITS_PER_LONG;
+ else
+ nr_bits = roundup_pow_of_two(nr_bits);
+ bitset_bytes = BITS_TO_BYTES(nr_bits);
+ bitset_mask = nr_bits - 1;
+ }
+
+ bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long));
+ bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node);
+
+ if (!bloom)
+ return ERR_PTR(-ENOMEM);
+
+ bpf_map_init_from_attr(&bloom->map, attr);
+
+ bloom->nr_hash_funcs = nr_hash_funcs;
+ bloom->bitset_mask = bitset_mask;
+
+ /* Check whether the value size is u32-aligned */
+ if ((attr->value_size & (sizeof(u32) - 1)) == 0)
+ bloom->aligned_u32_count =
+ attr->value_size / sizeof(u32);
+
+ if (!(attr->map_flags & BPF_F_ZERO_SEED))
+ bloom->hash_seed = get_random_int();
+
+ return &bloom->map;
+}
+
+static void bloom_map_free(struct bpf_map *map)
+{
+ struct bpf_bloom_filter *bloom =
+ container_of(map, struct bpf_bloom_filter, map);
+
+ bpf_map_area_free(bloom);
+}
+
+static void *bloom_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ /* The eBPF program should use map_peek_elem instead */
+ return ERR_PTR(-EINVAL);
+}
+
+static int bloom_map_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 flags)
+{
+ /* The eBPF program should use map_push_elem instead */
+ return -EINVAL;
+}
+
+static int bloom_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type)
+{
+ /* Bloom filter maps are keyless */
+ return btf_type_is_void(key_type) ? 0 : -EINVAL;
+}
+
+static int bpf_bloom_map_btf_id;
+const struct bpf_map_ops bloom_filter_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc = bloom_map_alloc,
+ .map_free = bloom_map_free,
+ .map_push_elem = bloom_map_push_elem,
+ .map_peek_elem = bloom_map_peek_elem,
+ .map_pop_elem = bloom_map_pop_elem,
+ .map_lookup_elem = bloom_map_lookup_elem,
+ .map_update_elem = bloom_map_update_elem,
+ .map_delete_elem = bloom_map_delete_elem,
+ .map_check_btf = bloom_map_check_btf,
+ .map_btf_name = "bpf_bloom_filter",
+ .map_btf_id = &bpf_bloom_map_btf_id,
+};
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 9abcc33f02cf..8ecfe4752769 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -93,6 +93,9 @@ const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
};
const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
+#ifdef CONFIG_NET
+ .test_run = bpf_struct_ops_test_run,
+#endif
};
static const struct btf_type *module_type;
@@ -312,6 +315,20 @@ static int check_zero_holes(const struct btf_type *t, void *data)
return 0;
}
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs,
+ struct bpf_prog *prog,
+ const struct btf_func_model *model,
+ void *image, void *image_end)
+{
+ u32 flags;
+
+ tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
+ tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
+ flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
+ return arch_prepare_bpf_trampoline(NULL, image, image_end,
+ model, flags, tprogs, NULL);
+}
+
static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
@@ -323,7 +340,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
struct bpf_tramp_progs *tprogs = NULL;
void *udata, *kdata;
int prog_fd, err = 0;
- void *image;
+ void *image, *image_end;
u32 i;
if (flags)
@@ -363,12 +380,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
udata = &uvalue->data;
kdata = &kvalue->data;
image = st_map->image;
+ image_end = st_map->image + PAGE_SIZE;
for_each_member(i, t, member) {
const struct btf_type *mtype, *ptype;
struct bpf_prog *prog;
u32 moff;
- u32 flags;
moff = btf_member_bit_offset(t, member) / 8;
ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
@@ -430,14 +447,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
goto reset_unlock;
}
- tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
- tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
- flags = st_ops->func_models[i].ret_size > 0 ?
- BPF_TRAMP_F_RET_FENTRY_RET : 0;
- err = arch_prepare_bpf_trampoline(NULL, image,
- st_map->image + PAGE_SIZE,
- &st_ops->func_models[i],
- flags, tprogs, NULL);
+ err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+ &st_ops->func_models[i],
+ image, image_end);
if (err < 0)
goto reset_unlock;
diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 066d83ea1c99..5678a9ddf817 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -2,6 +2,9 @@
/* internal file - do not include directly */
#ifdef CONFIG_BPF_JIT
+#ifdef CONFIG_NET
+BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
+#endif
#ifdef CONFIG_INET
#include <net/tcp.h>
BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index dfe61df4f974..dbc3ad07e21b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -281,6 +281,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
+ [BTF_KIND_DECL_TAG] = "DECL_TAG",
};
const char *btf_type_str(const struct btf_type *t)
@@ -459,6 +460,17 @@ static bool btf_type_is_datasec(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
}
+static bool btf_type_is_decl_tag(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
+}
+
+static bool btf_type_is_decl_tag_target(const struct btf_type *t)
+{
+ return btf_type_is_func(t) || btf_type_is_struct(t) ||
+ btf_type_is_var(t) || btf_type_is_typedef(t);
+}
+
u32 btf_nr_types(const struct btf *btf)
{
u32 total = 0;
@@ -537,6 +549,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
static bool btf_type_is_resolve_source_only(const struct btf_type *t)
{
return btf_type_is_var(t) ||
+ btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
@@ -563,6 +576,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t)
btf_type_is_struct(t) ||
btf_type_is_array(t) ||
btf_type_is_var(t) ||
+ btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
@@ -616,6 +630,11 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
return (const struct btf_var *)(t + 1);
}
+static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t)
+{
+ return (const struct btf_decl_tag *)(t + 1);
+}
+
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
{
return kind_ops[BTF_INFO_KIND(t->info)];
@@ -3801,6 +3820,110 @@ static const struct btf_kind_operations float_ops = {
.show = btf_df_show,
};
+static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env,
+ const struct btf_type *t,
+ u32 meta_left)
+{
+ const struct btf_decl_tag *tag;
+ u32 meta_needed = sizeof(*tag);
+ s32 component_idx;
+ const char *value;
+
+ if (meta_left < meta_needed) {
+ btf_verifier_log_basic(env, t,
+ "meta_left:%u meta_needed:%u",
+ meta_left, meta_needed);
+ return -EINVAL;
+ }
+
+ value = btf_name_by_offset(env->btf, t->name_off);
+ if (!value || !value[0]) {
+ btf_verifier_log_type(env, t, "Invalid value");
+ return -EINVAL;
+ }
+
+ if (btf_type_vlen(t)) {
+ btf_verifier_log_type(env, t, "vlen != 0");
+ return -EINVAL;
+ }
+
+ if (btf_type_kflag(t)) {
+ btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+ return -EINVAL;
+ }
+
+ component_idx = btf_type_decl_tag(t)->component_idx;
+ if (component_idx < -1) {
+ btf_verifier_log_type(env, t, "Invalid component_idx");
+ return -EINVAL;
+ }
+
+ btf_verifier_log_type(env, t, NULL);
+
+ return meta_needed;
+}
+
+static int btf_decl_tag_resolve(struct btf_verifier_env *env,
+ const struct resolve_vertex *v)
+{
+ const struct btf_type *next_type;
+ const struct btf_type *t = v->t;
+ u32 next_type_id = t->type;
+ struct btf *btf = env->btf;
+ s32 component_idx;
+ u32 vlen;
+
+ next_type = btf_type_by_id(btf, next_type_id);
+ if (!next_type || !btf_type_is_decl_tag_target(next_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid type_id");
+ return -EINVAL;
+ }
+
+ if (!env_type_is_resolve_sink(env, next_type) &&
+ !env_type_is_resolved(env, next_type_id))
+ return env_stack_push(env, next_type, next_type_id);
+
+ component_idx = btf_type_decl_tag(t)->component_idx;
+ if (component_idx != -1) {
+ if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) {
+ btf_verifier_log_type(env, v->t, "Invalid component_idx");
+ return -EINVAL;
+ }
+
+ if (btf_type_is_struct(next_type)) {
+ vlen = btf_type_vlen(next_type);
+ } else {
+ /* next_type should be a function */
+ next_type = btf_type_by_id(btf, next_type->type);
+ vlen = btf_type_vlen(next_type);
+ }
+
+ if ((u32)component_idx >= vlen) {
+ btf_verifier_log_type(env, v->t, "Invalid component_idx");
+ return -EINVAL;
+ }
+ }
+
+ env_stack_pop_resolved(env, next_type_id, 0);
+
+ return 0;
+}
+
+static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t)
+{
+ btf_verifier_log(env, "type=%u component_idx=%d", t->type,
+ btf_type_decl_tag(t)->component_idx);
+}
+
+static const struct btf_kind_operations decl_tag_ops = {
+ .check_meta = btf_decl_tag_check_meta,
+ .resolve = btf_decl_tag_resolve,
+ .check_member = btf_df_check_member,
+ .check_kflag_member = btf_df_check_kflag_member,
+ .log_details = btf_decl_tag_log,
+ .show = btf_df_show,
+};
+
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
@@ -3935,6 +4058,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_VAR] = &var_ops,
[BTF_KIND_DATASEC] = &datasec_ops,
[BTF_KIND_FLOAT] = &float_ops,
+ [BTF_KIND_DECL_TAG] = &decl_tag_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -4019,6 +4143,10 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
return !btf_resolved_type_id(btf, type_id) &&
!btf_resolved_type_size(btf, type_id);
+ if (btf_type_is_decl_tag(t))
+ return btf_resolved_type_id(btf, type_id) &&
+ !btf_resolved_type_size(btf, type_id);
+
if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
btf_type_is_var(t)) {
t = btf_type_id_resolve(btf, &type_id);
@@ -6215,3 +6343,58 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
};
BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
+
+/* BTF ID set registration API for modules */
+
+struct kfunc_btf_id_list {
+ struct list_head list;
+ struct mutex mutex;
+};
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+
+void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+ struct kfunc_btf_id_set *s)
+{
+ mutex_lock(&l->mutex);
+ list_add(&s->list, &l->list);
+ mutex_unlock(&l->mutex);
+}
+EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
+
+void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+ struct kfunc_btf_id_set *s)
+{
+ mutex_lock(&l->mutex);
+ list_del_init(&s->list);
+ mutex_unlock(&l->mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
+
+bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
+ struct module *owner)
+{
+ struct kfunc_btf_id_set *s;
+
+ if (!owner)
+ return false;
+ mutex_lock(&klist->mutex);
+ list_for_each_entry(s, &klist->list, list) {
+ if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
+ mutex_unlock(&klist->mutex);
+ return true;
+ }
+ }
+ mutex_unlock(&klist->mutex);
+ return false;
+}
+
+#endif
+
+#define DEFINE_KFUNC_BTF_ID_LIST(name) \
+ struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \
+ __MUTEX_INITIALIZER(name.mutex) }; \
+ EXPORT_SYMBOL_GPL(name)
+
+DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
+DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 6e3ae90ad107..327e3996eadb 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -32,6 +32,7 @@
#include <linux/perf_event.h>
#include <linux/extable.h>
#include <linux/log2.h>
+#include <linux/bpf_verifier.h>
#include <asm/barrier.h>
#include <asm/unaligned.h>
@@ -2263,6 +2264,9 @@ static void bpf_prog_free_deferred(struct work_struct *work)
int i;
aux = container_of(work, struct bpf_prog_aux, work);
+#ifdef CONFIG_BPF_SYSCALL
+ bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
+#endif
bpf_free_used_maps(aux);
bpf_free_used_btfs(aux);
if (bpf_prog_is_dev_bound(aux))
@@ -2365,6 +2369,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
return NULL;
}
+const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
+{
+ return NULL;
+}
+
u64 __weak
bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 32471ba02708..d29af9988f37 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -668,7 +668,7 @@ static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
(void *(*)(struct bpf_map *map, void *key))NULL));
- *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+ *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
offsetof(struct htab_elem, key) +
@@ -709,7 +709,7 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
(void *(*)(struct bpf_map *map, void *key))NULL));
- *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+ *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
offsetof(struct htab_elem, lru_node) +
@@ -2049,7 +2049,7 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
};
-static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
+static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
void *callback_ctx, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
@@ -2089,9 +2089,8 @@ static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
val = elem->key + roundup_key_size;
}
num_elems++;
- ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
- (u64)(long)key, (u64)(long)val,
- (u64)(long)callback_ctx, 0);
+ ret = callback_fn((u64)(long)map, (u64)(long)key,
+ (u64)(long)val, (u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret) {
rcu_read_unlock();
@@ -2397,7 +2396,7 @@ static int htab_of_map_gen_lookup(struct bpf_map *map,
BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
(void *(*)(struct bpf_map *map, void *key))NULL));
- *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+ *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
offsetof(struct htab_elem, key) +
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9aabf84afd4b..1ffd469c217f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -979,15 +979,13 @@ out:
return err;
}
-#define MAX_SNPRINTF_VARARGS 12
-
BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
const void *, data, u32, data_len)
{
int err, num_args;
u32 *bin_args;
- if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+ if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
@@ -1058,7 +1056,7 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
struct bpf_map *map = t->map;
void *value = t->value;
- void *callback_fn;
+ bpf_callback_t callback_fn;
void *key;
u32 idx;
@@ -1083,8 +1081,7 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
key = value - round_up(map->key_size, 8);
}
- BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
- (u64)(long)value, 0, 0);
+ callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
/* The verifier checked that return value is zero. */
this_cpu_write(hrtimer_running, NULL);
@@ -1437,6 +1434,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_snprintf_proto;
case BPF_FUNC_task_pt_regs:
return &bpf_task_pt_regs_proto;
+ case BPF_FUNC_trace_vprintk:
+ return bpf_get_trace_vprintk_proto();
default:
return NULL;
}
diff --git a/kernel/bpf/preload/.gitignore b/kernel/bpf/preload/.gitignore
index 856a4c5ad0dd..9452322902a5 100644
--- a/kernel/bpf/preload/.gitignore
+++ b/kernel/bpf/preload/.gitignore
@@ -1,4 +1,2 @@
-/FEATURE-DUMP.libbpf
-/bpf_helper_defs.h
-/feature
+/libbpf
/bpf_preload_umd
diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index 1951332dd15f..1400ac58178e 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -1,21 +1,35 @@
# SPDX-License-Identifier: GPL-2.0
LIBBPF_SRCS = $(srctree)/tools/lib/bpf/
-LIBBPF_A = $(obj)/libbpf.a
-LIBBPF_OUT = $(abspath $(obj))
+LIBBPF_OUT = $(abspath $(obj))/libbpf
+LIBBPF_A = $(LIBBPF_OUT)/libbpf.a
+LIBBPF_DESTDIR = $(LIBBPF_OUT)
+LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
# Although not in use by libbpf's Makefile, set $(O) so that the "dummy" test
# in tools/scripts/Makefile.include always succeeds when building the kernel
# with $(O) pointing to a relative path, as in "make O=build bindeb-pkg".
-$(LIBBPF_A):
- $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
+$(LIBBPF_A): | $(LIBBPF_OUT)
+ $(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ \
+ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
+ $(LIBBPF_OUT)/libbpf.a install_headers
+
+libbpf_hdrs: $(LIBBPF_A)
+
+.PHONY: libbpf_hdrs
+
+$(LIBBPF_OUT):
+ $(call msg,MKDIR,$@)
+ $(Q)mkdir -p $@
userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
- -I $(srctree)/tools/lib/ -Wno-unused-result
+ -I $(LIBBPF_INCLUDE) -Wno-unused-result
userprogs := bpf_preload_umd
-clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
+clean-files := libbpf/
+
+$(obj)/iterators/iterators.o: | libbpf_hdrs
bpf_preload_umd-objs := iterators/iterators.o
bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index 28fa8c1440f4..b8bd60511227 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -1,18 +1,26 @@
# SPDX-License-Identifier: GPL-2.0
OUTPUT := .output
+abs_out := $(abspath $(OUTPUT))
+
CLANG ?= clang
LLC ?= llc
LLVM_STRIP ?= llvm-strip
+
+TOOLS_PATH := $(abspath ../../../../tools)
+BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool
+BPFTOOL_OUTPUT := $(abs_out)/bpftool
DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
-LIBBPF_SRC := $(abspath ../../../../tools/lib/bpf)
-BPFOBJ := $(OUTPUT)/libbpf.a
-BPF_INCLUDE := $(OUTPUT)
-INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../../../tools/lib) \
- -I$(abspath ../../../../tools/include/uapi)
+
+LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf
+LIBBPF_OUTPUT := $(abs_out)/libbpf
+LIBBPF_DESTDIR := $(LIBBPF_OUTPUT)
+LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include
+BPFOBJ := $(LIBBPF_OUTPUT)/libbpf.a
+
+INCLUDES := -I$(OUTPUT) -I$(LIBBPF_INCLUDE) -I$(TOOLS_PATH)/include/uapi
CFLAGS := -g -Wall
-abs_out := $(abspath $(OUTPUT))
ifeq ($(V),1)
Q =
msg =
@@ -44,14 +52,18 @@ $(OUTPUT)/iterators.bpf.o: iterators.bpf.c $(BPFOBJ) | $(OUTPUT)
-c $(filter %.c,$^) -o $@ && \
$(LLVM_STRIP) -g $@
-$(OUTPUT):
+$(OUTPUT) $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT):
$(call msg,MKDIR,$@)
- $(Q)mkdir -p $(OUTPUT)
+ $(Q)mkdir -p $@
-$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)
+$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \
- OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+ OUTPUT=$(abspath $(dir $@))/ prefix= \
+ DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers
-$(DEFAULT_BPFTOOL):
- $(Q)$(MAKE) $(submake_extras) -C ../../../../tools/bpf/bpftool \
- prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install
+$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \
+ OUTPUT=$(BPFTOOL_OUTPUT)/ \
+ LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \
+ LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \
+ prefix= DESTDIR=$(abs_out)/ install-bin
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1cad6979a0d0..50f96ea4452a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
err = bpf_fd_reuseport_array_update_elem(map, key, value,
flags);
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
- map->map_type == BPF_MAP_TYPE_STACK) {
+ map->map_type == BPF_MAP_TYPE_STACK ||
+ map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
err = map->ops->map_push_elem(map, value, flags);
} else {
rcu_read_lock();
@@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
- map->map_type == BPF_MAP_TYPE_STACK) {
+ map->map_type == BPF_MAP_TYPE_STACK ||
+ map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
err = map->ops->map_peek_elem(map, value);
} else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
/* struct_ops map requires directly updating "value" */
@@ -348,6 +350,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
map->max_entries = attr->max_entries;
map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
map->numa_node = bpf_map_attr_numa_node(attr);
+ map->map_extra = attr->map_extra;
}
static int bpf_map_alloc_id(struct bpf_map *map)
@@ -555,6 +558,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"value_size:\t%u\n"
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
+ "map_extra:\t%#llx\n"
"memlock:\t%lu\n"
"map_id:\t%u\n"
"frozen:\t%u\n",
@@ -563,6 +567,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
map->value_size,
map->max_entries,
map->map_flags,
+ (unsigned long long)map->map_extra,
bpf_map_memory_footprint(map),
map->id,
READ_ONCE(map->frozen));
@@ -812,7 +817,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return ret;
}
-#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
+#define BPF_MAP_CREATE_LAST_FIELD map_extra
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -833,6 +838,10 @@ static int map_create(union bpf_attr *attr)
return -EINVAL;
}
+ if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
+ attr->map_extra != 0)
+ return -EINVAL;
+
f_flags = bpf_get_file_flag(attr->map_flags);
if (f_flags < 0)
return f_flags;
@@ -1082,6 +1091,14 @@ static int map_lookup_elem(union bpf_attr *attr)
if (!value)
goto free_key;
+ if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
+ if (copy_from_user(value, uvalue, value_size))
+ err = -EFAULT;
+ else
+ err = bpf_map_copy_value(map, key, value, attr->flags);
+ goto free_value;
+ }
+
err = bpf_map_copy_value(map, key, value, attr->flags);
if (err)
goto free_value;
@@ -1807,8 +1824,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
return 0;
}
+struct bpf_prog_kstats {
+ u64 nsecs;
+ u64 cnt;
+ u64 misses;
+};
+
static void bpf_prog_get_stats(const struct bpf_prog *prog,
- struct bpf_prog_stats *stats)
+ struct bpf_prog_kstats *stats)
{
u64 nsecs = 0, cnt = 0, misses = 0;
int cpu;
@@ -1821,9 +1844,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
st = per_cpu_ptr(prog->stats, cpu);
do {
start = u64_stats_fetch_begin_irq(&st->syncp);
- tnsecs = st->nsecs;
- tcnt = st->cnt;
- tmisses = st->misses;
+ tnsecs = u64_stats_read(&st->nsecs);
+ tcnt = u64_stats_read(&st->cnt);
+ tmisses = u64_stats_read(&st->misses);
} while (u64_stats_fetch_retry_irq(&st->syncp, start));
nsecs += tnsecs;
cnt += tcnt;
@@ -1839,7 +1862,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
{
const struct bpf_prog *prog = filp->private_data;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
- struct bpf_prog_stats stats;
+ struct bpf_prog_kstats stats;
bpf_prog_get_stats(prog, &stats);
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
@@ -1851,7 +1874,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
"prog_id:\t%u\n"
"run_time_ns:\t%llu\n"
"run_cnt:\t%llu\n"
- "recursion_misses:\t%llu\n",
+ "recursion_misses:\t%llu\n"
+ "verified_insns:\t%u\n",
prog->type,
prog->jited,
prog_tag,
@@ -1859,7 +1883,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
prog->aux->id,
stats.nsecs,
stats.cnt,
- stats.misses);
+ stats.misses,
+ prog->aux->verified_insns);
}
#endif
@@ -3578,7 +3603,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
struct bpf_prog_info info;
u32 info_len = attr->info.info_len;
- struct bpf_prog_stats stats;
+ struct bpf_prog_kstats stats;
char __user *uinsns;
u32 ulen;
int err;
@@ -3628,6 +3653,8 @@ static int bpf_prog_get_info_by_fd(struct file *file,
info.run_cnt = stats.cnt;
info.recursion_misses = stats.misses;
+ info.verified_insns = prog->aux->verified_insns;
+
if (!bpf_capable()) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
@@ -3874,6 +3901,7 @@ static int bpf_map_get_info_by_fd(struct file *file,
info.value_size = map->value_size;
info.max_entries = map->max_entries;
info.map_flags = map->map_flags;
+ info.map_extra = map->map_extra;
memcpy(info.name, map->name, sizeof(map->name));
if (map->btf) {
@@ -4756,6 +4784,31 @@ static const struct bpf_func_proto bpf_sys_close_proto = {
.arg1_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
+{
+ if (flags)
+ return -EINVAL;
+
+ if (name_sz <= 1 || name[name_sz - 1])
+ return -EINVAL;
+
+ if (!bpf_dump_raw_ok(current_cred()))
+ return -EPERM;
+
+ *res = kallsyms_lookup_name(name);
+ return *res ? 0 : -ENOENT;
+}
+
+const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
+ .func = bpf_kallsyms_lookup_name,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_LONG,
+};
+
static const struct bpf_func_proto *
syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -4766,6 +4819,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_btf_find_by_name_kind_proto;
case BPF_FUNC_sys_close:
return &bpf_sys_close_proto;
+ case BPF_FUNC_kallsyms_lookup_name:
+ return &bpf_kallsyms_lookup_name_proto;
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index fe1e857324e6..e98de5e73ba5 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -10,6 +10,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/rcupdate_wait.h>
#include <linux/module.h>
+#include <linux/static_call.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -526,7 +527,7 @@ out:
}
#define NO_START_TIME 1
-static u64 notrace bpf_prog_start_time(void)
+static __always_inline u64 notrace bpf_prog_start_time(void)
{
u64 start = NO_START_TIME;
@@ -544,7 +545,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
stats = this_cpu_ptr(prog->stats);
u64_stats_update_begin(&stats->syncp);
- stats->misses++;
+ u64_stats_inc(&stats->misses);
u64_stats_update_end(&stats->syncp);
}
@@ -585,11 +586,13 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
* Hence check that 'start' is valid.
*/
start > NO_START_TIME) {
+ unsigned long flags;
+
stats = this_cpu_ptr(prog->stats);
- u64_stats_update_begin(&stats->syncp);
- stats->cnt++;
- stats->nsecs += sched_clock() - start;
- u64_stats_update_end(&stats->syncp);
+ flags = u64_stats_update_begin_irqsave(&stats->syncp);
+ u64_stats_inc(&stats->cnt);
+ u64_stats_add(&stats->nsecs, sched_clock() - start);
+ u64_stats_update_end_irqrestore(&stats->syncp, flags);
}
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index de006552be8a..f0dca726ebfd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -612,6 +612,20 @@ static const char *kernel_type_name(const struct btf* btf, u32 id)
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
}
+/* The reg state of a pointer or a bounded scalar was saved when
+ * it was spilled to the stack.
+ */
+static bool is_spilled_reg(const struct bpf_stack_state *stack)
+{
+ return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
+}
+
+static void scrub_spilled_slot(u8 *stype)
+{
+ if (*stype != STACK_INVALID)
+ *stype = STACK_MISC;
+}
+
static void print_verifier_state(struct bpf_verifier_env *env,
const struct bpf_func_state *state)
{
@@ -717,7 +731,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
continue;
verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
print_liveness(env, state->stack[i].spilled_ptr.live);
- if (state->stack[i].slot_type[0] == STACK_SPILL) {
+ if (is_spilled_reg(&state->stack[i])) {
reg = &state->stack[i].spilled_ptr;
t = reg->type;
verbose(env, "=%s", reg_type_str[t]);
@@ -1406,12 +1420,12 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
static bool __reg64_bound_s32(s64 a)
{
- return a > S32_MIN && a < S32_MAX;
+ return a >= S32_MIN && a <= S32_MAX;
}
static bool __reg64_bound_u32(u64 a)
{
- return a > U32_MIN && a < U32_MAX;
+ return a >= U32_MIN && a <= U32_MAX;
}
static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
@@ -1626,52 +1640,168 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
return env->subprog_cnt - 1;
}
+#define MAX_KFUNC_DESCS 256
+#define MAX_KFUNC_BTFS 256
+
struct bpf_kfunc_desc {
struct btf_func_model func_model;
u32 func_id;
s32 imm;
+ u16 offset;
+};
+
+struct bpf_kfunc_btf {
+ struct btf *btf;
+ struct module *module;
+ u16 offset;
};
-#define MAX_KFUNC_DESCS 256
struct bpf_kfunc_desc_tab {
struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
u32 nr_descs;
};
-static int kfunc_desc_cmp_by_id(const void *a, const void *b)
+struct bpf_kfunc_btf_tab {
+ struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
+ u32 nr_descs;
+};
+
+static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
{
const struct bpf_kfunc_desc *d0 = a;
const struct bpf_kfunc_desc *d1 = b;
/* func_id is not greater than BTF_MAX_TYPE */
- return d0->func_id - d1->func_id;
+ return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
+}
+
+static int kfunc_btf_cmp_by_off(const void *a, const void *b)
+{
+ const struct bpf_kfunc_btf *d0 = a;
+ const struct bpf_kfunc_btf *d1 = b;
+
+ return d0->offset - d1->offset;
}
static const struct bpf_kfunc_desc *
-find_kfunc_desc(const struct bpf_prog *prog, u32 func_id)
+find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
{
struct bpf_kfunc_desc desc = {
.func_id = func_id,
+ .offset = offset,
};
struct bpf_kfunc_desc_tab *tab;
tab = prog->aux->kfunc_tab;
return bsearch(&desc, tab->descs, tab->nr_descs,
- sizeof(tab->descs[0]), kfunc_desc_cmp_by_id);
+ sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
+}
+
+static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
+ s16 offset, struct module **btf_modp)
+{
+ struct bpf_kfunc_btf kf_btf = { .offset = offset };
+ struct bpf_kfunc_btf_tab *tab;
+ struct bpf_kfunc_btf *b;
+ struct module *mod;
+ struct btf *btf;
+ int btf_fd;
+
+ tab = env->prog->aux->kfunc_btf_tab;
+ b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
+ sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
+ if (!b) {
+ if (tab->nr_descs == MAX_KFUNC_BTFS) {
+ verbose(env, "too many different module BTFs\n");
+ return ERR_PTR(-E2BIG);
+ }
+
+ if (bpfptr_is_null(env->fd_array)) {
+ verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
+ return ERR_PTR(-EPROTO);
+ }
+
+ if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
+ offset * sizeof(btf_fd),
+ sizeof(btf_fd)))
+ return ERR_PTR(-EFAULT);
+
+ btf = btf_get_by_fd(btf_fd);
+ if (IS_ERR(btf)) {
+ verbose(env, "invalid module BTF fd specified\n");
+ return btf;
+ }
+
+ if (!btf_is_module(btf)) {
+ verbose(env, "BTF fd for kfunc is not a module BTF\n");
+ btf_put(btf);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mod = btf_try_get_module(btf);
+ if (!mod) {
+ btf_put(btf);
+ return ERR_PTR(-ENXIO);
+ }
+
+ b = &tab->descs[tab->nr_descs++];
+ b->btf = btf;
+ b->module = mod;
+ b->offset = offset;
+
+ sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+ kfunc_btf_cmp_by_off, NULL);
+ }
+ if (btf_modp)
+ *btf_modp = b->module;
+ return b->btf;
+}
+
+void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
+{
+ if (!tab)
+ return;
+
+ while (tab->nr_descs--) {
+ module_put(tab->descs[tab->nr_descs].module);
+ btf_put(tab->descs[tab->nr_descs].btf);
+ }
+ kfree(tab);
+}
+
+static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
+ u32 func_id, s16 offset,
+ struct module **btf_modp)
+{
+ if (offset) {
+ if (offset < 0) {
+ /* In the future, this can be allowed to increase limit
+ * of fd index into fd_array, interpreted as u16.
+ */
+ verbose(env, "negative offset disallowed for kernel module function call\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return __find_kfunc_desc_btf(env, offset, btf_modp);
+ }
+ return btf_vmlinux ?: ERR_PTR(-ENOENT);
}
-static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
+static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
{
const struct btf_type *func, *func_proto;
+ struct bpf_kfunc_btf_tab *btf_tab;
struct bpf_kfunc_desc_tab *tab;
struct bpf_prog_aux *prog_aux;
struct bpf_kfunc_desc *desc;
const char *func_name;
+ struct btf *desc_btf;
unsigned long addr;
int err;
prog_aux = env->prog->aux;
tab = prog_aux->kfunc_tab;
+ btf_tab = prog_aux->kfunc_btf_tab;
if (!tab) {
if (!btf_vmlinux) {
verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
@@ -1699,7 +1829,29 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
prog_aux->kfunc_tab = tab;
}
- if (find_kfunc_desc(env->prog, func_id))
+ /* func_id == 0 is always invalid, but instead of returning an error, be
+ * conservative and wait until the code elimination pass before returning
+ * error, so that invalid calls that get pruned out can be in BPF programs
+ * loaded from userspace. It is also required that offset be untouched
+ * for such calls.
+ */
+ if (!func_id && !offset)
+ return 0;
+
+ if (!btf_tab && offset) {
+ btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
+ if (!btf_tab)
+ return -ENOMEM;
+ prog_aux->kfunc_btf_tab = btf_tab;
+ }
+
+ desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+ if (IS_ERR(desc_btf)) {
+ verbose(env, "failed to find BTF for kernel function\n");
+ return PTR_ERR(desc_btf);
+ }
+
+ if (find_kfunc_desc(env->prog, func_id, offset))
return 0;
if (tab->nr_descs == MAX_KFUNC_DESCS) {
@@ -1707,20 +1859,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
return -E2BIG;
}
- func = btf_type_by_id(btf_vmlinux, func_id);
+ func = btf_type_by_id(desc_btf, func_id);
if (!func || !btf_type_is_func(func)) {
verbose(env, "kernel btf_id %u is not a function\n",
func_id);
return -EINVAL;
}
- func_proto = btf_type_by_id(btf_vmlinux, func->type);
+ func_proto = btf_type_by_id(desc_btf, func->type);
if (!func_proto || !btf_type_is_func_proto(func_proto)) {
verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
func_id);
return -EINVAL;
}
- func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+ func_name = btf_name_by_offset(desc_btf, func->name_off);
addr = kallsyms_lookup_name(func_name);
if (!addr) {
verbose(env, "cannot find address for kernel function %s\n",
@@ -1730,13 +1882,14 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
desc = &tab->descs[tab->nr_descs++];
desc->func_id = func_id;
- desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
- err = btf_distill_func_proto(&env->log, btf_vmlinux,
+ desc->imm = BPF_CALL_IMM(addr);
+ desc->offset = offset;
+ err = btf_distill_func_proto(&env->log, desc_btf,
func_proto, func_name,
&desc->func_model);
if (!err)
sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
- kfunc_desc_cmp_by_id, NULL);
+ kfunc_desc_cmp_by_id_off, NULL);
return err;
}
@@ -1815,7 +1968,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
} else if (bpf_pseudo_call(insn)) {
ret = add_subprog(env, i + insn->imm + 1);
} else {
- ret = add_kfunc_call(env, insn->imm);
+ ret = add_kfunc_call(env, insn->imm, insn->off);
}
if (ret < 0)
@@ -2152,12 +2305,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
{
const struct btf_type *func;
+ struct btf *desc_btf;
if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
return NULL;
- func = btf_type_by_id(btf_vmlinux, insn->imm);
- return btf_name_by_offset(btf_vmlinux, func->name_off);
+ desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+ if (IS_ERR(desc_btf))
+ return "<error>";
+
+ func = btf_type_by_id(desc_btf, insn->imm);
+ return btf_name_by_offset(desc_btf, func->name_off);
}
/* For given verifier state backtrack_insn() is called from the last insn to
@@ -2373,7 +2531,7 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env,
reg->precise = true;
}
for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
- if (func->stack[j].slot_type[0] != STACK_SPILL)
+ if (!is_spilled_reg(&func->stack[j]))
continue;
reg = &func->stack[j].spilled_ptr;
if (reg->type != SCALAR_VALUE)
@@ -2415,7 +2573,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
}
while (spi >= 0) {
- if (func->stack[spi].slot_type[0] != STACK_SPILL) {
+ if (!is_spilled_reg(&func->stack[spi])) {
stack_mask = 0;
break;
}
@@ -2514,7 +2672,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
return 0;
}
- if (func->stack[i].slot_type[0] != STACK_SPILL) {
+ if (!is_spilled_reg(&func->stack[i])) {
stack_mask &= ~(1ull << i);
continue;
}
@@ -2626,15 +2784,21 @@ static bool __is_pointer_value(bool allow_ptr_leaks,
}
static void save_register_state(struct bpf_func_state *state,
- int spi, struct bpf_reg_state *reg)
+ int spi, struct bpf_reg_state *reg,
+ int size)
{
int i;
state->stack[spi].spilled_ptr = *reg;
- state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ if (size == BPF_REG_SIZE)
+ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+
+ for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
+ state->stack[spi].slot_type[i - 1] = STACK_SPILL;
- for (i = 0; i < BPF_REG_SIZE; i++)
- state->stack[spi].slot_type[i] = STACK_SPILL;
+ /* size < 8 bytes spill */
+ for (; i; i--)
+ scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
}
/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
@@ -2681,7 +2845,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
}
- if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
+ if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
!register_is_null(reg) && env->bpf_capable) {
if (dst_reg != BPF_REG_FP) {
/* The backtracking logic can only recognize explicit
@@ -2694,7 +2858,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
if (err)
return err;
}
- save_register_state(state, spi, reg);
+ save_register_state(state, spi, reg, size);
} else if (reg && is_spillable_regtype(reg->type)) {
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
@@ -2706,16 +2870,16 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
return -EINVAL;
}
- save_register_state(state, spi, reg);
+ save_register_state(state, spi, reg, size);
} else {
u8 type = STACK_MISC;
/* regular write of data into stack destroys any spilled ptr */
state->stack[spi].spilled_ptr.type = NOT_INIT;
/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
- if (state->stack[spi].slot_type[0] == STACK_SPILL)
+ if (is_spilled_reg(&state->stack[spi]))
for (i = 0; i < BPF_REG_SIZE; i++)
- state->stack[spi].slot_type[i] = STACK_MISC;
+ scrub_spilled_slot(&state->stack[spi].slot_type[i]);
/* only mark the slot as written if all 8 bytes were written
* otherwise read propagation may incorrectly stop too soon
@@ -2918,23 +3082,50 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
struct bpf_func_state *state = vstate->frame[vstate->curframe];
int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
struct bpf_reg_state *reg;
- u8 *stype;
+ u8 *stype, type;
stype = reg_state->stack[spi].slot_type;
reg = &reg_state->stack[spi].spilled_ptr;
- if (stype[0] == STACK_SPILL) {
+ if (is_spilled_reg(&reg_state->stack[spi])) {
if (size != BPF_REG_SIZE) {
+ u8 scalar_size = 0;
+
if (reg->type != SCALAR_VALUE) {
verbose_linfo(env, env->insn_idx, "; ");
verbose(env, "invalid size of register fill\n");
return -EACCES;
}
- if (dst_regno >= 0) {
+
+ mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+ if (dst_regno < 0)
+ return 0;
+
+ for (i = BPF_REG_SIZE; i > 0 && stype[i - 1] == STACK_SPILL; i--)
+ scalar_size++;
+
+ if (!(off % BPF_REG_SIZE) && size == scalar_size) {
+ /* The earlier check_reg_arg() has decided the
+ * subreg_def for this insn. Save it first.
+ */
+ s32 subreg_def = state->regs[dst_regno].subreg_def;
+
+ state->regs[dst_regno] = *reg;
+ state->regs[dst_regno].subreg_def = subreg_def;
+ } else {
+ for (i = 0; i < size; i++) {
+ type = stype[(slot - i) % BPF_REG_SIZE];
+ if (type == STACK_SPILL)
+ continue;
+ if (type == STACK_MISC)
+ continue;
+ verbose(env, "invalid read from stack off %d+%d size %d\n",
+ off, i, size);
+ return -EACCES;
+ }
mark_reg_unknown(env, state->regs, dst_regno);
- state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
}
- mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+ state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
return 0;
}
for (i = 1; i < BPF_REG_SIZE; i++) {
@@ -2965,8 +3156,6 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
}
mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
} else {
- u8 type;
-
for (i = 0; i < size; i++) {
type = stype[(slot - i) % BPF_REG_SIZE];
if (type == STACK_MISC)
@@ -4514,17 +4703,17 @@ static int check_stack_range_initialized(
goto mark;
}
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+ if (is_spilled_reg(&state->stack[spi]) &&
state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
goto mark;
- if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+ if (is_spilled_reg(&state->stack[spi]) &&
(state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
env->allow_ptr_leaks)) {
if (clobber) {
__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
- state->stack[spi].slot_type[j] = STACK_MISC;
+ scrub_spilled_slot(&state->stack[spi].slot_type[j]);
}
goto mark;
}
@@ -4813,7 +5002,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
return -EINVAL;
}
break;
-
+ case BPF_MAP_TYPE_BLOOM_FILTER:
+ if (meta->func_id == BPF_FUNC_map_peek_elem)
+ *arg_type = ARG_PTR_TO_MAP_VALUE;
+ break;
default:
break;
}
@@ -5388,6 +5580,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_task_storage_delete)
goto error;
break;
+ case BPF_MAP_TYPE_BLOOM_FILTER:
+ if (func_id != BPF_FUNC_map_peek_elem &&
+ func_id != BPF_FUNC_map_push_elem)
+ goto error;
+ break;
default:
break;
}
@@ -5455,13 +5652,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
map->map_type != BPF_MAP_TYPE_SOCKHASH)
goto error;
break;
- case BPF_FUNC_map_peek_elem:
case BPF_FUNC_map_pop_elem:
- case BPF_FUNC_map_push_elem:
if (map->map_type != BPF_MAP_TYPE_QUEUE &&
map->map_type != BPF_MAP_TYPE_STACK)
goto error;
break;
+ case BPF_FUNC_map_peek_elem:
+ case BPF_FUNC_map_push_elem:
+ if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+ map->map_type != BPF_MAP_TYPE_STACK &&
+ map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
+ goto error;
+ break;
case BPF_FUNC_sk_storage_get:
case BPF_FUNC_sk_storage_delete:
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
@@ -6485,23 +6687,33 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
u32 i, nargs, func_id, ptr_type_id;
+ struct module *btf_mod = NULL;
const struct btf_param *args;
+ struct btf *desc_btf;
int err;
+ /* skip for now, but return error when we find this in fixup_kfunc_call */
+ if (!insn->imm)
+ return 0;
+
+ desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+ if (IS_ERR(desc_btf))
+ return PTR_ERR(desc_btf);
+
func_id = insn->imm;
- func = btf_type_by_id(btf_vmlinux, func_id);
- func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
- func_proto = btf_type_by_id(btf_vmlinux, func->type);
+ func = btf_type_by_id(desc_btf, func_id);
+ func_name = btf_name_by_offset(desc_btf, func->name_off);
+ func_proto = btf_type_by_id(desc_btf, func->type);
if (!env->ops->check_kfunc_call ||
- !env->ops->check_kfunc_call(func_id)) {
+ !env->ops->check_kfunc_call(func_id, btf_mod)) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
/* Check the arguments */
- err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs);
+ err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
if (err)
return err;
@@ -6509,15 +6721,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
mark_reg_not_init(env, regs, caller_saved[i]);
/* Check return type */
- t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL);
+ t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
if (btf_type_is_scalar(t)) {
mark_reg_unknown(env, regs, BPF_REG_0);
mark_btf_func_reg_size(env, BPF_REG_0, t->size);
} else if (btf_type_is_ptr(t)) {
- ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type,
+ ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
&ptr_type_id);
if (!btf_type_is_struct(ptr_type)) {
- ptr_type_name = btf_name_by_offset(btf_vmlinux,
+ ptr_type_name = btf_name_by_offset(desc_btf,
ptr_type->name_off);
verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
func_name, btf_type_str(ptr_type),
@@ -6525,7 +6737,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
mark_reg_known_zero(env, regs, BPF_REG_0);
- regs[BPF_REG_0].btf = btf_vmlinux;
+ regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
@@ -6536,7 +6748,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
for (i = 0; i < nargs; i++) {
u32 regno = i + 1;
- t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL);
+ t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
if (btf_type_is_ptr(t))
mark_btf_func_reg_size(env, regno, sizeof(void *));
else
@@ -10356,9 +10568,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
* return false to continue verification of this path
*/
return false;
- if (i % BPF_REG_SIZE)
+ if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
continue;
- if (old->stack[spi].slot_type[0] != STACK_SPILL)
+ if (!is_spilled_reg(&old->stack[spi]))
continue;
if (!regsafe(env, &old->stack[spi].spilled_ptr,
&cur->stack[spi].spilled_ptr, idmap))
@@ -10565,7 +10777,7 @@ static int propagate_precision(struct bpf_verifier_env *env,
}
for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- if (state->stack[i].slot_type[0] != STACK_SPILL)
+ if (!is_spilled_reg(&state->stack[i]))
continue;
state_reg = &state->stack[i].spilled_ptr;
if (state_reg->type != SCALAR_VALUE ||
@@ -11076,7 +11288,8 @@ static int do_check(struct bpf_verifier_env *env)
env->jmps_processed++;
if (opcode == BPF_CALL) {
if (BPF_SRC(insn->code) != BPF_K ||
- insn->off != 0 ||
+ (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
+ && insn->off != 0) ||
(insn->src_reg != BPF_REG_0 &&
insn->src_reg != BPF_PSEUDO_CALL &&
insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
@@ -12432,6 +12645,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
+ func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
func[i]->aux->linfo = prog->aux->linfo;
func[i]->aux->nr_linfo = prog->aux->nr_linfo;
func[i]->aux->jited_linfo = prog->aux->jited_linfo;
@@ -12469,8 +12683,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
if (!bpf_pseudo_call(insn))
continue;
subprog = insn->off;
- insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
}
/* we use the aux data to keep a list of the start addresses
@@ -12618,10 +12831,15 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env,
{
const struct bpf_kfunc_desc *desc;
+ if (!insn->imm) {
+ verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
+ return -EINVAL;
+ }
+
/* insn->imm has the btf func_id. Replace it with
* an address (relative to __bpf_base_call).
*/
- desc = find_kfunc_desc(env->prog, insn->imm);
+ desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
if (!desc) {
verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
insn->imm);
@@ -12902,7 +13120,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
insn->imm == BPF_FUNC_map_push_elem ||
insn->imm == BPF_FUNC_map_pop_elem ||
insn->imm == BPF_FUNC_map_peek_elem ||
- insn->imm == BPF_FUNC_redirect_map)) {
+ insn->imm == BPF_FUNC_redirect_map ||
+ insn->imm == BPF_FUNC_for_each_map_elem)) {
aux = &env->insn_aux_data[i + delta];
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
@@ -12946,36 +13165,37 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_redirect,
(int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
+ (int (*)(struct bpf_map *map,
+ bpf_callback_t callback_fn,
+ void *callback_ctx,
+ u64 flags))NULL));
patch_map_ops_generic:
switch (insn->imm) {
case BPF_FUNC_map_lookup_elem:
- insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
continue;
case BPF_FUNC_map_update_elem:
- insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_update_elem);
continue;
case BPF_FUNC_map_delete_elem:
- insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
continue;
case BPF_FUNC_map_push_elem:
- insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_push_elem);
continue;
case BPF_FUNC_map_pop_elem:
- insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
continue;
case BPF_FUNC_map_peek_elem:
- insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
continue;
case BPF_FUNC_redirect_map:
- insn->imm = BPF_CAST_CALL(ops->map_redirect) -
- __bpf_call_base;
+ insn->imm = BPF_CALL_IMM(ops->map_redirect);
+ continue;
+ case BPF_FUNC_for_each_map_elem:
+ insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
continue;
}
@@ -13826,6 +14046,7 @@ skip_full_check:
env->verification_time = ktime_get_ns() - start_time;
print_verification_stats(env);
+ env->prog->aux->verified_insns = env->insn_processed;
if (log->level && bpf_verifier_log_full(log))
ret = -ENOSPC;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a89b01b7e59a..f2253ea729a2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13491,3 +13491,5 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
.threaded = true,
};
#endif /* CONFIG_CGROUP_PERF */
+
+DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8e2eb950aa82..7396488793ff 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
.arg2_type = ARG_CONST_SIZE,
};
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+static void __set_printk_clr_event(void)
{
/*
* This program might be calling bpf_trace_printk,
@@ -410,11 +410,57 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
*/
if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
pr_warn_ratelimited("could not enable bpf_trace_printk events");
+}
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+{
+ __set_printk_clr_event();
return &bpf_trace_printk_proto;
}
-#define MAX_SEQ_PRINTF_VARARGS 12
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
+ u32, data_len)
+{
+ static char buf[BPF_TRACE_PRINTK_SIZE];
+ unsigned long flags;
+ int ret, num_args;
+ u32 *bin_args;
+
+ if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
+ num_args = data_len / 8;
+
+ ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+ if (ret < 0)
+ return ret;
+
+ raw_spin_lock_irqsave(&trace_printk_lock, flags);
+ ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+
+ trace_bpf_trace_printk(buf);
+ raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+
+ bpf_bprintf_cleanup();
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_trace_vprintk_proto = {
+ .func = bpf_trace_vprintk,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_MEM,
+ .arg2_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
+ .arg4_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
+const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
+{
+ __set_printk_clr_event();
+ return &bpf_trace_vprintk_proto;
+}
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
@@ -422,7 +468,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
int err, num_args;
u32 *bin_args;
- if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+ if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
@@ -1017,6 +1063,34 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
+{
+#ifndef CONFIG_X86
+ return -ENOENT;
+#else
+ static const u32 br_entry_size = sizeof(struct perf_branch_entry);
+ u32 entry_cnt = size / br_entry_size;
+
+ entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
+
+ if (unlikely(flags))
+ return -EINVAL;
+
+ if (!entry_cnt)
+ return -ENOENT;
+
+ return entry_cnt * br_entry_size;
+#endif
+}
+
+static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+ .func = bpf_get_branch_snapshot,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@@ -1132,6 +1206,10 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_snprintf_proto;
case BPF_FUNC_get_func_ip:
return &bpf_get_func_ip_proto_tracing;
+ case BPF_FUNC_get_branch_snapshot:
+ return &bpf_get_branch_snapshot_proto;
+ case BPF_FUNC_trace_vprintk:
+ return bpf_get_trace_vprintk_proto();
default:
return bpf_base_func_proto(func_id);
}
@@ -1530,6 +1608,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_to_tcp_request_sock_proto;
case BPF_FUNC_skc_to_udp6_sock:
return &bpf_skc_to_udp6_sock_proto;
+ case BPF_FUNC_skc_to_unix_sock:
+ return &bpf_skc_to_unix_sock_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_tracing_proto;
case BPF_FUNC_sk_storage_delete:
@@ -1566,13 +1646,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
- if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
- return false;
- if (type != BPF_READ)
- return false;
- if (off % size != 0)
- return false;
- return true;
+ return bpf_tracing_ctx_access(off, size, type);
}
static bool tracing_prog_is_valid_access(int off, int size,
@@ -1580,13 +1654,7 @@ static bool tracing_prog_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
- if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
- return false;
- if (type != BPF_READ)
- return false;
- if (off % size != 0)
- return false;
- return btf_ctx_access(off, size, type, prog, info);
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
}
int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,