summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-12-12 11:27:41 -0800
committerJakub Kicinski <kuba@kernel.org>2022-12-12 11:27:42 -0800
commit26f708a28454df2062a63fd869e983c379f50ff0 (patch)
treee9580092e7d69af3f9d5add0cd331bad2a6bf708 /kernel
parentb2b509fb5a1e6af1e630a755b32c4658099df70b (diff)
parent99523094de48df65477cbbb9d8027f4bc4701794 (diff)
downloadlinux-26f708a28454df2062a63fd869e983c379f50ff0.tar.bz2
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2022-12-11 We've added 74 non-merge commits during the last 11 day(s) which contain a total of 88 files changed, 3362 insertions(+), 789 deletions(-). The main changes are: 1) Decouple prune and jump points handling in the verifier, from Andrii. 2) Do not rely on ALLOW_ERROR_INJECTION for fmod_ret, from Benjamin. Merged from hid tree. 3) Do not zero-extend kfunc return values. Necessary fix for 32-bit archs, from Björn. 4) Don't use rcu_users to refcount in task kfuncs, from David. 5) Three reg_state->id fixes in the verifier, from Eduard. 6) Optimize bpf_mem_alloc by reusing elements from free_by_rcu, from Hou. 7) Refactor dynptr handling in the verifier, from Kumar. 8) Remove the "/sys" mount and umount dance in {open,close}_netns in bpf selftests, from Martin. 9) Enable sleepable support for cgrp local storage, from Yonghong. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (74 commits) selftests/bpf: test case for relaxed prunning of active_lock.id selftests/bpf: Add pruning test case for bpf_spin_lock bpf: use check_ids() for active_lock comparison selftests/bpf: verify states_equal() maintains idmap across all frames bpf: states_equal() must build idmap for all function frames selftests/bpf: test cases for regsafe() bug skipping check_id() bpf: regsafe() must not skip check_ids() docs/bpf: Add documentation for BPF_MAP_TYPE_SK_STORAGE selftests/bpf: Add test for dynptr reinit in user_ringbuf callback bpf: Use memmove for bpf_dynptr_{read,write} bpf: Move PTR_TO_STACK alignment check to process_dynptr_func bpf: Rework check_func_arg_reg_off bpf: Rework process_dynptr_func bpf: Propagate errors from process_* checks in check_func_arg bpf: Refactor ARG_PTR_TO_DYNPTR checks into process_dynptr_func bpf: Skip rcu_barrier() if rcu_trace_implies_rcu_gp() is true bpf: Reuse freed element in free_by_rcu during allocation selftests/bpf: Bring test_offload.py back to life bpf: Fix comment error in fixup_kfunc_call function bpf: Do not zero-extend kfunc return values ... ==================== Link: https://lore.kernel.org/r/20221212024701.73809-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c3
-rw-r--r--kernel/bpf/bpf_inode_storage.c4
-rw-r--r--kernel/bpf/bpf_lsm.c16
-rw-r--r--kernel/bpf/bpf_task_storage.c4
-rw-r--r--kernel/bpf/btf.c32
-rw-r--r--kernel/bpf/helpers.c118
-rw-r--r--kernel/bpf/memalloc.c31
-rw-r--r--kernel/bpf/verifier.c696
8 files changed, 605 insertions, 299 deletions
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 309403800f82..6cdf6d9ed91d 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -211,7 +211,6 @@ BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgro
return ret;
}
-BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_local_storage_map)
const struct bpf_map_ops cgrp_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -222,7 +221,7 @@ const struct bpf_map_ops cgrp_storage_map_ops = {
.map_update_elem = bpf_cgrp_storage_update_elem,
.map_delete_elem = bpf_cgrp_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &cgroup_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = cgroup_storage_ptr,
};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 6a1d4d22816a..05f4c66c9089 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -213,8 +213,6 @@ static void inode_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(map, &inode_cache, NULL);
}
-BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct,
- bpf_local_storage_map)
const struct bpf_map_ops inode_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -225,7 +223,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
.map_update_elem = bpf_fd_inode_storage_update_elem,
.map_delete_elem = bpf_fd_inode_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &inode_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = inode_storage_ptr,
};
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index ae0267f150b5..9ea42a45da47 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -345,11 +345,27 @@ BTF_ID(func, bpf_lsm_task_to_inode)
BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks)
+BTF_SET_START(untrusted_lsm_hooks)
+BTF_ID(func, bpf_lsm_bpf_map_free_security)
+BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
+BTF_ID(func, bpf_lsm_bpf_prog_free_security)
+BTF_ID(func, bpf_lsm_file_alloc_security)
+BTF_ID(func, bpf_lsm_file_free_security)
+BTF_ID(func, bpf_lsm_sk_alloc_security)
+BTF_ID(func, bpf_lsm_sk_free_security)
+BTF_ID(func, bpf_lsm_task_free)
+BTF_SET_END(untrusted_lsm_hooks)
+
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
{
return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
}
+bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
+{
+ return !btf_id_set_contains(&untrusted_lsm_hooks, prog->aux->attach_btf_id);
+}
+
const struct bpf_prog_ops lsm_prog_ops = {
};
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 8e832db8151a..1e486055a523 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -324,7 +324,7 @@ static void task_storage_map_free(struct bpf_map *map)
bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
}
-BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
+BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
const struct bpf_map_ops task_storage_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = bpf_local_storage_map_alloc_check,
@@ -335,7 +335,7 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_update_elem = bpf_pid_task_storage_update_elem,
.map_delete_elem = bpf_pid_task_storage_delete_elem,
.map_check_btf = bpf_local_storage_map_check_btf,
- .map_btf_id = &task_storage_map_btf_ids[0],
+ .map_btf_id = &bpf_local_storage_map_btf_id[0],
.map_owner_storage_ptr = task_storage_ptr,
};
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index d11cbf8cece7..f7dd8af06413 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -19,6 +19,7 @@
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
+#include <linux/bpf_lsm.h>
#include <linux/skmsg.h>
#include <linux/perf_event.h>
#include <linux/bsearch.h>
@@ -205,6 +206,7 @@ enum btf_kfunc_hook {
BTF_KFUNC_HOOK_STRUCT_OPS,
BTF_KFUNC_HOOK_TRACING,
BTF_KFUNC_HOOK_SYSCALL,
+ BTF_KFUNC_HOOK_FMODRET,
BTF_KFUNC_HOOK_MAX,
};
@@ -5829,6 +5831,7 @@ static bool prog_args_trusted(const struct bpf_prog *prog)
case BPF_PROG_TYPE_TRACING:
return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER;
case BPF_PROG_TYPE_LSM:
+ return bpf_lsm_is_trusted(prog);
case BPF_PROG_TYPE_STRUCT_OPS:
return true;
default:
@@ -7606,11 +7609,14 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf,
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
}
-/* This function must be invoked only from initcalls/module init functions */
-int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
- const struct btf_kfunc_id_set *kset)
+u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
+{
+ return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
+}
+
+static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
+ const struct btf_kfunc_id_set *kset)
{
- enum btf_kfunc_hook hook;
struct btf *btf;
int ret;
@@ -7629,13 +7635,29 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
if (IS_ERR(btf))
return PTR_ERR(btf);
- hook = bpf_prog_type_to_kfunc_hook(prog_type);
ret = btf_populate_kfunc_set(btf, hook, kset->set);
btf_put(btf);
return ret;
}
+
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
+ const struct btf_kfunc_id_set *kset)
+{
+ enum btf_kfunc_hook hook;
+
+ hook = bpf_prog_type_to_kfunc_hook(prog_type);
+ return __register_btf_kfunc_id_set(hook, kset);
+}
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
+/* This function must be invoked only from initcalls/module init functions */
+int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset)
+{
+ return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset);
+}
+EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set);
+
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
{
struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a5a511430f2a..af30c6cbd65d 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1404,7 +1404,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
#define DYNPTR_SIZE_MASK 0xFFFFFF
#define DYNPTR_RDONLY_BIT BIT(31)
-static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
+static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_RDONLY_BIT;
}
@@ -1414,7 +1414,7 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}
-u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
+u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
}
@@ -1438,7 +1438,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
memset(ptr, 0, sizeof(*ptr));
}
-static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
{
u32 size = bpf_dynptr_get_size(ptr);
@@ -1483,7 +1483,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
};
-BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
+BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
int err;
@@ -1495,7 +1495,11 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
if (err)
return err;
- memcpy(dst, src->data + src->offset + offset, len);
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst, src->data + src->offset + offset, len);
return 0;
}
@@ -1506,12 +1510,12 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
- .arg3_type = ARG_PTR_TO_DYNPTR,
+ .arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
.arg4_type = ARG_ANYTHING,
.arg5_type = ARG_ANYTHING,
};
-BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
+BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
int err;
@@ -1523,7 +1527,11 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
if (err)
return err;
- memcpy(dst->data + dst->offset + offset, src, len);
+ /* Source and destination may possibly overlap, hence use memmove to
+ * copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
+ * pointing to overlapping PTR_TO_MAP_VALUE regions.
+ */
+ memmove(dst->data + dst->offset + offset, src, len);
return 0;
}
@@ -1532,14 +1540,14 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
.func = bpf_dynptr_write,
.gpl_only = false,
.ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_DYNPTR,
+ .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
.arg5_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
+BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
int err;
@@ -1560,7 +1568,7 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
.func = bpf_dynptr_data,
.gpl_only = false,
.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
- .arg1_type = ARG_PTR_TO_DYNPTR,
+ .arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
};
@@ -1833,8 +1841,59 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
*/
struct task_struct *bpf_task_acquire(struct task_struct *p)
{
- refcount_inc(&p->rcu_users);
- return p;
+ return get_task_struct(p);
+}
+
+/**
+ * bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
+ * acquired by this kfunc which is not stored in a map as a kptr, must be
+ * released by calling bpf_task_release().
+ * @p: The task on which a reference is being acquired.
+ */
+struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
+{
+ /* For the time being this function returns NULL, as it's not currently
+ * possible to safely acquire a reference to a task with RCU protection
+ * using get_task_struct() and put_task_struct(). This is due to the
+ * slightly odd mechanics of p->rcu_users, and how task RCU protection
+ * works.
+ *
+ * A struct task_struct is refcounted by two different refcount_t
+ * fields:
+ *
+ * 1. p->usage: The "true" refcount field which tracks a task's
+ * lifetime. The task is freed as soon as this
+ * refcount drops to 0.
+ *
+ * 2. p->rcu_users: An "RCU users" refcount field which is statically
+ * initialized to 2, and is co-located in a union with
+ * a struct rcu_head field (p->rcu). p->rcu_users
+ * essentially encapsulates a single p->usage
+ * refcount, and when p->rcu_users goes to 0, an RCU
+ * callback is scheduled on the struct rcu_head which
+ * decrements the p->usage refcount.
+ *
+ * There are two important implications to this task refcounting logic
+ * described above. The first is that
+ * refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
+ * after the refcount goes to 0, the RCU callback being scheduled will
+ * cause the memory backing the refcount to again be nonzero due to the
+ * fields sharing a union. The other is that we can't rely on RCU to
+ * guarantee that a task is valid in a BPF program. This is because a
+ * task could have already transitioned to being in the TASK_DEAD
+ * state, had its rcu_users refcount go to 0, and its rcu callback
+ * invoked in which it drops its single p->usage reference. At this
+ * point the task will be freed as soon as the last p->usage reference
+ * goes to 0, without waiting for another RCU gp to elapse. The only
+ * way that a BPF program can guarantee that a task is valid is in this
+ * scenario is to hold a p->usage refcount itself.
+ *
+ * Until we're able to resolve this issue, either by pulling
+ * p->rcu_users and p->rcu out of the union, or by getting rid of
+ * p->usage and just using p->rcu_users for refcounting, we'll just
+ * return NULL here.
+ */
+ return NULL;
}
/**
@@ -1845,33 +1904,15 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
*/
struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
{
- struct task_struct *p;
-
- rcu_read_lock();
- p = READ_ONCE(*pp);
-
- /* Another context could remove the task from the map and release it at
- * any time, including after we've done the lookup above. This is safe
- * because we're in an RCU read region, so the task is guaranteed to
- * remain valid until at least the rcu_read_unlock() below.
+ /* We must return NULL here until we have clarity on how to properly
+ * leverage RCU for ensuring a task's lifetime. See the comment above
+ * in bpf_task_acquire_not_zero() for more details.
*/
- if (p && !refcount_inc_not_zero(&p->rcu_users))
- /* If the task had been removed from the map and freed as
- * described above, refcount_inc_not_zero() will return false.
- * The task will be freed at some point after the current RCU
- * gp has ended, so just return NULL to the user.
- */
- p = NULL;
- rcu_read_unlock();
-
- return p;
+ return NULL;
}
/**
- * bpf_task_release - Release the reference acquired on a struct task_struct *.
- * If this kfunc is invoked in an RCU read region, the task_struct is
- * guaranteed to not be freed until the current grace period has ended, even if
- * its refcount drops to 0.
+ * bpf_task_release - Release the reference acquired on a task.
* @p: The task on which a reference is being released.
*/
void bpf_task_release(struct task_struct *p)
@@ -1879,7 +1920,7 @@ void bpf_task_release(struct task_struct *p)
if (!p)
return;
- put_task_struct_rcu_user(p);
+ put_task_struct(p);
}
#ifdef CONFIG_CGROUPS
@@ -1927,7 +1968,7 @@ struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
}
/**
- * bpf_cgroup_release - Release the reference acquired on a struct cgroup *.
+ * bpf_cgroup_release - Release the reference acquired on a cgroup.
* If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
* not be freed until the current grace period has ended, even if its refcount
* drops to 0.
@@ -2013,6 +2054,7 @@ BTF_ID_FLAGS(func, bpf_list_push_back)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
#ifdef CONFIG_CGROUPS
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 8f0d65f2474a..ebcc3dd0fa19 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -171,9 +171,24 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
memcg = get_memcg(c);
old_memcg = set_active_memcg(memcg);
for (i = 0; i < cnt; i++) {
- obj = __alloc(c, node);
- if (!obj)
- break;
+ /*
+ * free_by_rcu is only manipulated by irq work refill_work().
+ * IRQ works on the same CPU are called sequentially, so it is
+ * safe to use __llist_del_first() here. If alloc_bulk() is
+ * invoked by the initial prefill, there will be no running
+ * refill_work(), so __llist_del_first() is fine as well.
+ *
+ * In most cases, objects on free_by_rcu are from the same CPU.
+ * If some objects come from other CPUs, it doesn't incur any
+ * harm because NUMA_NO_NODE means the preference for current
+ * numa node and it is not a guarantee.
+ */
+ obj = __llist_del_first(&c->free_by_rcu);
+ if (!obj) {
+ obj = __alloc(c, node);
+ if (!obj)
+ break;
+ }
if (IS_ENABLED(CONFIG_PREEMPT_RT))
/* In RT irq_work runs in per-cpu kthread, so disable
* interrupts to avoid preemption and interrupts and
@@ -449,9 +464,17 @@ static void free_mem_alloc(struct bpf_mem_alloc *ma)
{
/* waiting_for_gp lists was drained, but __free_rcu might
* still execute. Wait for it now before we freeing percpu caches.
+ *
+ * rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
+ * but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
+ * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(),
+ * so if call_rcu(head, __free_rcu) is skipped due to
+ * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
+ * using rcu_trace_implies_rcu_gp() as well.
*/
rcu_barrier_tasks_trace();
- rcu_barrier();
+ if (!rcu_trace_implies_rcu_gp())
+ rcu_barrier();
free_mem_alloc_no_barrier(ma);
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4e7f1d085e53..a5255a0dcbb6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -451,6 +451,11 @@ static bool reg_type_not_null(enum bpf_reg_type type)
type == PTR_TO_SOCK_COMMON;
}
+static bool type_is_ptr_alloc_obj(u32 type)
+{
+ return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
+}
+
static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
{
struct btf_record *rec = NULL;
@@ -458,7 +463,7 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
if (reg->type == PTR_TO_MAP_VALUE) {
rec = reg->map_ptr->record;
- } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ } else if (type_is_ptr_alloc_obj(reg->type)) {
meta = btf_find_struct_meta(reg->btf, reg->btf_id);
if (meta)
rec = meta->record;
@@ -587,7 +592,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
[PTR_TO_BUF] = "buf",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
- [PTR_TO_DYNPTR] = "dynptr_ptr",
+ [CONST_PTR_TO_DYNPTR] = "dynptr_ptr",
};
if (type & PTR_MAYBE_NULL) {
@@ -720,6 +725,28 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
return type == BPF_DYNPTR_TYPE_RINGBUF;
}
+static void __mark_dynptr_reg(struct bpf_reg_state *reg,
+ enum bpf_dynptr_type type,
+ bool first_slot);
+
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
+
+static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
+ struct bpf_reg_state *sreg2,
+ enum bpf_dynptr_type type)
+{
+ __mark_dynptr_reg(sreg1, type, true);
+ __mark_dynptr_reg(sreg2, type, false);
+}
+
+static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
+ enum bpf_dynptr_type type)
+{
+ __mark_dynptr_reg(reg, type, true);
+}
+
+
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
enum bpf_arg_type arg_type, int insn_idx)
{
@@ -741,9 +768,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
if (type == BPF_DYNPTR_TYPE_INVALID)
return -EINVAL;
- state->stack[spi].spilled_ptr.dynptr.first_slot = true;
- state->stack[spi].spilled_ptr.dynptr.type = type;
- state->stack[spi - 1].spilled_ptr.dynptr.type = type;
+ mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
+ &state->stack[spi - 1].spilled_ptr, type);
if (dynptr_type_refcounted(type)) {
/* The id is used to track proper releasing */
@@ -751,8 +777,8 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
if (id < 0)
return id;
- state->stack[spi].spilled_ptr.id = id;
- state->stack[spi - 1].spilled_ptr.id = id;
+ state->stack[spi].spilled_ptr.ref_obj_id = id;
+ state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
}
return 0;
@@ -774,25 +800,23 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
}
/* Invalidate any slices associated with this dynptr */
- if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
- release_reference(env, state->stack[spi].spilled_ptr.id);
- state->stack[spi].spilled_ptr.id = 0;
- state->stack[spi - 1].spilled_ptr.id = 0;
- }
-
- state->stack[spi].spilled_ptr.dynptr.first_slot = false;
- state->stack[spi].spilled_ptr.dynptr.type = 0;
- state->stack[spi - 1].spilled_ptr.dynptr.type = 0;
+ if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
+ WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
+ __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
+ __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
return 0;
}
static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
- int i;
+ int spi, i;
+
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return false;
+ spi = get_spi(reg->off);
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
return true;
@@ -805,13 +829,17 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
return true;
}
-bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
+ int spi;
int i;
+ /* This already represents first slot of initialized bpf_dynptr */
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return true;
+
+ spi = get_spi(reg->off);
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
!state->stack[spi].spilled_ptr.dynptr.first_slot)
return false;
@@ -825,21 +853,24 @@ bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
return true;
}
-bool is_dynptr_type_expected(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg,
- enum bpf_arg_type arg_type)
+static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ enum bpf_arg_type arg_type)
{
struct bpf_func_state *state = func(env, reg);
enum bpf_dynptr_type dynptr_type;
- int spi = get_spi(reg->off);
+ int spi;
/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
if (arg_type == ARG_PTR_TO_DYNPTR)
return true;
dynptr_type = arg_to_dynptr_type(arg_type);
-
- return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
+ if (reg->type == CONST_PTR_TO_DYNPTR) {
+ return reg->dynptr.type == dynptr_type;
+ } else {
+ spi = get_spi(reg->off);
+ return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
+ }
}
/* The reg state of a pointer or a bounded scalar was saved when
@@ -1351,9 +1382,6 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void __mark_reg_not_init(const struct bpf_verifier_env *env,
- struct bpf_reg_state *reg);
-
/* This helper doesn't clear reg->id */
static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{
@@ -1416,6 +1444,19 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
__mark_reg_known_zero(regs + regno);
}
+static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
+ bool first_slot)
+{
+ /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
+ * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
+ * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
+ */
+ __mark_reg_known_zero(reg);
+ reg->type = CONST_PTR_TO_DYNPTR;
+ reg->dynptr.type = type;
+ reg->dynptr.first_slot = first_slot;
+}
+
static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
{
if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
@@ -2525,6 +2566,16 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
return 0;
}
+static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
+{
+ env->insn_aux_data[idx].jmp_point = true;
+}
+
+static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].jmp_point;
+}
+
/* for any branch, call, exit record the history of jmps in the given state */
static int push_jmp_history(struct bpf_verifier_env *env,
struct bpf_verifier_state *cur)
@@ -2533,6 +2584,9 @@ static int push_jmp_history(struct bpf_verifier_env *env,
struct bpf_idx_pair *p;
size_t alloc_size;
+ if (!is_jmp_point(env, env->insn_idx))
+ return 0;
+
cnt++;
alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
@@ -4275,7 +4329,7 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
return true;
/* If a register is not referenced, it is trusted if it has the
- * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
+ * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
* other type modifiers may be safe, but we elect to take an opt-in
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
* not.
@@ -4287,6 +4341,11 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
!bpf_type_has_unsafe_modifiers(reg->type);
}
+static bool is_rcu_reg(const struct bpf_reg_state *reg)
+{
+ return reg->type & MEM_RCU;
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -4703,6 +4762,18 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
u32 btf_id;
int ret;
+ if (!env->allow_ptr_leaks) {
+ verbose(env,
+ "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+ tname);
+ return -EPERM;
+ }
+ if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
+ verbose(env,
+ "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
+ tname);
+ return -EINVAL;
+ }
if (off < 0) {
verbose(env,
"R%d is ptr_%s invalid negative access: off=%d\n",
@@ -4773,14 +4844,16 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (flag & MEM_RCU) {
/* Mark value register as MEM_RCU only if it is protected by
- * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
+ * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
* itself can already indicate trustedness inside the rcu
- * read lock region. Also mark it as PTR_TRUSTED.
+ * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
+ * it could be null in some cases.
*/
- if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
+ if (!env->cur_state->active_rcu_lock ||
+ !(is_trusted_reg(reg) || is_rcu_reg(reg)))
flag &= ~MEM_RCU;
else
- flag |= PTR_TRUSTED;
+ flag |= PTR_MAYBE_NULL;
} else if (reg->type & MEM_RCU) {
/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
* with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
@@ -4823,9 +4896,9 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
tname = btf_name_by_offset(btf_vmlinux, t->name_off);
- if (!env->allow_ptr_to_map_access) {
+ if (!env->allow_ptr_leaks) {
verbose(env,
- "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
+ "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
tname);
return -EPERM;
}
@@ -5726,7 +5799,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
cur->active_lock.ptr = NULL;
cur->active_lock.id = 0;
- for (i = 0; i < fstate->acquired_refs; i++) {
+ for (i = fstate->acquired_refs - 1; i >= 0; i--) {
int err;
/* Complain on error because this reference state cannot
@@ -5822,6 +5895,119 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
return 0;
}
+/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
+ * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
+ *
+ * In both cases we deal with the first 8 bytes, but need to mark the next 8
+ * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
+ * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
+ *
+ * Mutability of bpf_dynptr is at two levels, one is at the level of struct
+ * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
+ * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
+ * mutate the view of the dynptr and also possibly destroy it. In the latter
+ * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
+ * memory that dynptr points to.
+ *
+ * The verifier will keep track both levels of mutation (bpf_dynptr's in
+ * reg->type and the memory's in reg->dynptr.type), but there is no support for
+ * readonly dynptr view yet, hence only the first case is tracked and checked.
+ *
+ * This is consistent with how C applies the const modifier to a struct object,
+ * where the pointer itself inside bpf_dynptr becomes const but not what it
+ * points to.
+ *
+ * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
+ * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
+ */
+int process_dynptr_func(struct bpf_verifier_env *env, int regno,
+ enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+
+ /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
+ * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
+ */
+ if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
+ verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
+ return -EFAULT;
+ }
+ /* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
+ * check_func_arg_reg_off's logic. We only need to check offset
+ * alignment for PTR_TO_STACK.
+ */
+ if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
+ verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
+ return -EINVAL;
+ }
+ /* MEM_UNINIT - Points to memory that is an appropriate candidate for
+ * constructing a mutable bpf_dynptr object.
+ *
+ * Currently, this is only possible with PTR_TO_STACK
+ * pointing to a region of at least 16 bytes which doesn't
+ * contain an existing bpf_dynptr.
+ *
+ * MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
+ * mutated or destroyed. However, the memory it points to
+ * may be mutated.
+ *
+ * None - Points to a initialized dynptr that can be mutated and
+ * destroyed, including mutation of the memory it points
+ * to.
+ */
+ if (arg_type & MEM_UNINIT) {
+ if (!is_dynptr_reg_valid_uninit(env, reg)) {
+ verbose(env, "Dynptr has to be an uninitialized dynptr\n");
+ return -EINVAL;
+ }
+
+ /* We only support one dynptr being uninitialized at the moment,
+ * which is sufficient for the helper functions we have right now.
+ */
+ if (meta->uninit_dynptr_regno) {
+ verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
+ return -EFAULT;
+ }
+
+ meta->uninit_dynptr_regno = regno;
+ } else /* MEM_RDONLY and None case from above */ {
+ /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
+ if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
+ verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
+ return -EINVAL;
+ }
+
+ if (!is_dynptr_reg_valid_init(env, reg)) {
+ verbose(env,
+ "Expected an initialized dynptr as arg #%d\n",
+ regno);
+ return -EINVAL;
+ }
+
+ /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
+ if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
+ const char *err_extra = "";
+
+ switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
+ case DYNPTR_TYPE_LOCAL:
+ err_extra = "local";
+ break;
+ case DYNPTR_TYPE_RINGBUF:
+ err_extra = "ringbuf";
+ break;
+ default:
+ err_extra = "<unknown>";
+ break;
+ }
+ verbose(env,
+ "Expected a dynptr of type %s as arg #%d\n",
+ err_extra, regno);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
static bool arg_type_is_mem_size(enum bpf_arg_type type)
{
return type == ARG_CONST_SIZE ||
@@ -5945,7 +6131,7 @@ static const struct bpf_reg_types btf_ptr_types = {
.types = {
PTR_TO_BTF_ID,
PTR_TO_BTF_ID | PTR_TRUSTED,
- PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
+ PTR_TO_BTF_ID | MEM_RCU,
},
};
static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -5962,7 +6148,7 @@ static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } }
static const struct bpf_reg_types dynptr_types = {
.types = {
PTR_TO_STACK,
- PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL,
+ CONST_PTR_TO_DYNPTR,
}
};
@@ -6091,17 +6277,38 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno,
enum bpf_arg_type arg_type)
{
- enum bpf_reg_type type = reg->type;
- bool fixed_off_ok = false;
+ u32 type = reg->type;
- switch ((u32)type) {
- /* Pointer types where reg offset is explicitly allowed: */
- case PTR_TO_STACK:
- if (arg_type_is_dynptr(arg_type) && reg->off % BPF_REG_SIZE) {
- verbose(env, "cannot pass in dynptr at an offset\n");
+ /* When referenced register is passed to release function, its fixed
+ * offset must be 0.
+ *
+ * We will check arg_type_is_release reg has ref_obj_id when storing
+ * meta->release_regno.
+ */
+ if (arg_type_is_release(arg_type)) {
+ /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
+ * may not directly point to the object being released, but to
+ * dynptr pointing to such object, which might be at some offset
+ * on the stack. In that case, we simply to fallback to the
+ * default handling.
+ */
+ if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
+ return 0;
+ /* Doing check_ptr_off_reg check for the offset will catch this
+ * because fixed_off_ok is false, but checking here allows us
+ * to give the user a better error message.
+ */
+ if (reg->off) {
+ verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
+ regno);
return -EINVAL;
}
- fallthrough;
+ return __check_ptr_off_reg(env, reg, regno, false);
+ }
+
+ switch (type) {
+ /* Pointer types where both fixed and variable offset is explicitly allowed: */
+ case PTR_TO_STACK:
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
case PTR_TO_MAP_KEY:
@@ -6112,47 +6319,38 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BUF:
case PTR_TO_BUF | MEM_RDONLY:
case SCALAR_VALUE:
- /* Some of the argument types nevertheless require a
- * zero register offset.
- */
- if (base_type(arg_type) != ARG_PTR_TO_RINGBUF_MEM)
- return 0;
- break;
+ return 0;
/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
* fixed offset.
*/
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
- case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
/* When referenced PTR_TO_BTF_ID is passed to release function,
- * it's fixed offset must be 0. In the other cases, fixed offset
- * can be non-zero.
+ * its fixed offset must be 0. In the other cases, fixed offset
+ * can be non-zero. This was already checked above. So pass
+ * fixed_off_ok as true to allow fixed offset for all other
+ * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
+ * still need to do checks instead of returning.
*/
- if (arg_type_is_release(arg_type) && reg->off) {
- verbose(env, "R%d must have zero offset when passed to release func\n",
- regno);
- return -EINVAL;
- }
- /* For arg is release pointer, fixed_off_ok must be false, but
- * we already checked and rejected reg->off != 0 above, so set
- * to true to allow fixed offset for all other cases.
- */
- fixed_off_ok = true;
- break;
+ return __check_ptr_off_reg(env, reg, regno, true);
default:
- break;
+ return __check_ptr_off_reg(env, reg, regno, false);
}
- return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
}
-static u32 stack_slot_get_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
+ int spi;
- return state->stack[spi].spilled_ptr.id;
+ if (reg->type == CONST_PTR_TO_DYNPTR)
+ return reg->ref_obj_id;
+
+ spi = get_spi(reg->off);
+ return state->stack[spi].spilled_ptr.ref_obj_id;
}
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
@@ -6217,11 +6415,22 @@ skip_type_check:
if (arg_type_is_release(arg_type)) {
if (arg_type_is_dynptr(arg_type)) {
struct bpf_func_state *state = func(env, reg);
- int spi = get_spi(reg->off);
+ int spi;
- if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
- !state->stack[spi].spilled_ptr.id) {
- verbose(env, "arg %d is an unacquired reference\n", regno);
+ /* Only dynptr created on stack can be released, thus
+ * the get_spi and stack state checks for spilled_ptr
+ * should only be done before process_dynptr_func for
+ * PTR_TO_STACK.
+ */
+ if (reg->type == PTR_TO_STACK) {
+ spi = get_spi(reg->off);
+ if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
+ !state->stack[spi].spilled_ptr.ref_obj_id) {
+ verbose(env, "arg %d is an unacquired reference\n", regno);
+ return -EINVAL;
+ }
+ } else {
+ verbose(env, "cannot release unowned const bpf_dynptr\n");
return -EINVAL;
}
} else if (!reg->ref_obj_id && !register_is_null(reg)) {
@@ -6318,19 +6527,22 @@ skip_type_check:
break;
case ARG_PTR_TO_SPIN_LOCK:
if (meta->func_id == BPF_FUNC_spin_lock) {
- if (process_spin_lock(env, regno, true))
- return -EACCES;
+ err = process_spin_lock(env, regno, true);
+ if (err)
+ return err;
} else if (meta->func_id == BPF_FUNC_spin_unlock) {
- if (process_spin_lock(env, regno, false))
- return -EACCES;
+ err = process_spin_lock(env, regno, false);
+ if (err)
+ return err;
} else {
verbose(env, "verifier internal error\n");
return -EFAULT;
}
break;
case ARG_PTR_TO_TIMER:
- if (process_timer_func(env, regno, meta))
- return -EACCES;
+ err = process_timer_func(env, regno, meta);
+ if (err)
+ return err;
break;
case ARG_PTR_TO_FUNC:
meta->subprogno = reg->subprogno;
@@ -6353,52 +6565,9 @@ skip_type_check:
err = check_mem_size_reg(env, reg, regno, true, meta);
break;
case ARG_PTR_TO_DYNPTR:
- /* We only need to check for initialized / uninitialized helper
- * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the
- * assumption is that if it is, that a helper function
- * initialized the dynptr on behalf of the BPF program.
- */
- if (base_type(reg->type) == PTR_TO_DYNPTR)
- break;
- if (arg_type & MEM_UNINIT) {
- if (!is_dynptr_reg_valid_uninit(env, reg)) {
- verbose(env, "Dynptr has to be an uninitialized dynptr\n");
- return -EINVAL;
- }
-
- /* We only support one dynptr being uninitialized at the moment,
- * which is sufficient for the helper functions we have right now.
- */
- if (meta->uninit_dynptr_regno) {
- verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
- return -EFAULT;
- }
-
- meta->uninit_dynptr_regno = regno;
- } else if (!is_dynptr_reg_valid_init(env, reg)) {
- verbose(env,
- "Expected an initialized dynptr as arg #%d\n",
- arg + 1);
- return -EINVAL;
- } else if (!is_dynptr_type_expected(env, reg, arg_type)) {
- const char *err_extra = "";
-
- switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
- case DYNPTR_TYPE_LOCAL:
- err_extra = "local";
- break;
- case DYNPTR_TYPE_RINGBUF:
- err_extra = "ringbuf";
- break;
- default:
- err_extra = "<unknown>";
- break;
- }
- verbose(env,
- "Expected a dynptr of type %s as arg #%d\n",
- err_extra, arg + 1);
- return -EINVAL;
- }
+ err = process_dynptr_func(env, regno, arg_type, meta);
+ if (err)
+ return err;
break;
case ARG_CONST_ALLOC_SIZE_OR_ZERO:
if (!tnum_is_const(reg->var_off)) {
@@ -6465,8 +6634,9 @@ skip_type_check:
break;
}
case ARG_PTR_TO_KPTR:
- if (process_kptr_func(env, regno, meta))
- return -EACCES;
+ err = process_kptr_func(env, regno, meta);
+ if (err)
+ return err;
break;
}
@@ -7234,11 +7404,10 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
{
/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
* callback_ctx, u64 flags);
- * callback_fn(struct bpf_dynptr_t* dynptr, void *callback_ctx);
+ * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
*/
__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
- callee->regs[BPF_REG_1].type = PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL;
- __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+ mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
/* unused */
@@ -7632,7 +7801,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env);
+ /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
+ * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
+ * is safe to do directly.
+ */
if (meta.uninit_dynptr_regno) {
+ if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
+ verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
+ return -EFAULT;
+ }
/* we write BPF_DW bits (8 bytes) at a time */
for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
@@ -7650,15 +7827,24 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (meta.release_regno) {
err = -EINVAL;
- if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1]))
+ /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
+ * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
+ * is safe to do directly.
+ */
+ if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
+ if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
+ verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
+ return -EFAULT;
+ }
err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
- else if (meta.ref_obj_id)
+ } else if (meta.ref_obj_id) {
err = release_reference(env, meta.ref_obj_id);
- /* meta.ref_obj_id can only be 0 if register that is meant to be
- * released is NULL, which must be > R0.
- */
- else if (register_is_null(&regs[meta.release_regno]))
+ } else if (register_is_null(&regs[meta.release_regno])) {
+ /* meta.ref_obj_id can only be 0 if register that is meant to be
+ * released is NULL, which must be > R0.
+ */
err = 0;
+ }
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
func_id_name(func_id), func_id);
@@ -7732,11 +7918,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EFAULT;
}
- if (base_type(reg->type) != PTR_TO_DYNPTR)
- /* Find the id of the dynptr we're
- * tracking the reference of
- */
- meta.ref_obj_id = stack_slot_get_id(env, reg);
+ meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
break;
}
}
@@ -8026,6 +8208,11 @@ static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_DESTRUCTIVE;
}
+static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_RCU;
+}
+
static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
{
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
@@ -8710,13 +8897,20 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
switch (kf_arg_type) {
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
case KF_ARG_PTR_TO_BTF_ID:
- if (!is_kfunc_trusted_args(meta))
+ if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
break;
if (!is_trusted_reg(reg)) {
- verbose(env, "R%d must be referenced or trusted\n", regno);
- return -EINVAL;
+ if (!is_kfunc_rcu(meta)) {
+ verbose(env, "R%d must be referenced or trusted\n", regno);
+ return -EINVAL;
+ }
+ if (!is_rcu_reg(reg)) {
+ verbose(env, "R%d must be a rcu pointer\n", regno);
+ return -EINVAL;
+ }
}
+
fallthrough;
case KF_ARG_PTR_TO_CTX:
/* Trusted arguments have the same offset checks as release arguments */
@@ -8780,22 +8974,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return ret;
break;
case KF_ARG_PTR_TO_DYNPTR:
- if (reg->type != PTR_TO_STACK) {
- verbose(env, "arg#%d expected pointer to stack\n", i);
+ if (reg->type != PTR_TO_STACK &&
+ reg->type != CONST_PTR_TO_DYNPTR) {
+ verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
return -EINVAL;
}
- if (!is_dynptr_reg_valid_init(env, reg)) {
- verbose(env, "arg#%d pointer type %s %s must be valid and initialized\n",
- i, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
-
- if (!is_dynptr_type_expected(env, reg, ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL)) {
- verbose(env, "arg#%d pointer type %s %s points to unsupported dynamic pointer type\n",
- i, btf_type_str(ref_t), ref_tname);
- return -EINVAL;
- }
+ ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
+ if (ret < 0)
+ return ret;
break;
case KF_ARG_PTR_TO_LIST_HEAD:
if (reg->type != PTR_TO_MAP_VALUE &&
@@ -8827,7 +9014,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
- bpf_type_has_unsafe_modifiers(reg->type)) &&
+ (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
!reg2btf_ids[base_type(reg->type)]) {
verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
verbose(env, "expected %s or socket\n",
@@ -8942,7 +9129,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
} else if (rcu_unlock) {
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
if (reg->type & MEM_RCU) {
- reg->type &= ~(MEM_RCU | PTR_TRUSTED);
+ reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
reg->type |= PTR_UNTRUSTED;
}
}));
@@ -11282,7 +11469,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
bool is_null)
{
if (type_may_be_null(reg->type) && reg->id == id &&
- !WARN_ON_ONCE(!reg->id)) {
+ (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
/* Old offset (both fixed and variable parts) should have been
* known-zero, because we don't allow pointer arithmetic on
* pointers that might be NULL. If we see this happening, don't
@@ -12104,11 +12291,16 @@ static struct bpf_verifier_state_list **explored_state(
return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
}
-static void init_explored_state(struct bpf_verifier_env *env, int idx)
+static void mark_prune_point(struct bpf_verifier_env *env, int idx)
{
env->insn_aux_data[idx].prune_point = true;
}
+static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
+{
+ return env->insn_aux_data[insn_idx].prune_point;
+}
+
enum {
DONE_EXPLORING = 0,
KEEP_EXPLORING = 1,
@@ -12137,9 +12329,11 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return -EINVAL;
}
- if (e == BRANCH)
+ if (e == BRANCH) {
/* mark branch target for state pruning */
- init_explored_state(env, w);
+ mark_prune_point(env, w);
+ mark_jmp_point(env, w);
+ }
if (insn_state[w] == 0) {
/* tree-edge */
@@ -12166,8 +12360,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return DONE_EXPLORING;
}
-static int visit_func_call_insn(int t, int insn_cnt,
- struct bpf_insn *insns,
+static int visit_func_call_insn(int t, struct bpf_insn *insns,
struct bpf_verifier_env *env,
bool visit_callee)
{
@@ -12177,10 +12370,12 @@ static int visit_func_call_insn(int t, int insn_cnt,
if (ret)
return ret;
- if (t + 1 < insn_cnt)
- init_explored_state(env, t + 1);
+ mark_prune_point(env, t + 1);
+ /* when we exit from subprog, we need to record non-linear history */
+ mark_jmp_point(env, t + 1);
+
if (visit_callee) {
- init_explored_state(env, t);
+ mark_prune_point(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
/* It's ok to allow recursion from CFG point of
* view. __check_func_call() will do the actual
@@ -12196,13 +12391,13 @@ static int visit_func_call_insn(int t, int insn_cnt,
* DONE_EXPLORING - the instruction was fully explored
* KEEP_EXPLORING - there is still work to be done before it is fully explored
*/
-static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
+static int visit_insn(int t, struct bpf_verifier_env *env)
{
struct bpf_insn *insns = env->prog->insnsi;
int ret;
if (bpf_pseudo_func(insns + t))
- return visit_func_call_insn(t, insn_cnt, insns, env, true);
+ return visit_func_call_insn(t, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insns[t].code) != BPF_JMP &&
@@ -12215,13 +12410,13 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
case BPF_CALL:
if (insns[t].imm == BPF_FUNC_timer_set_callback)
- /* Mark this call insn to trigger is_state_visited() check
- * before call itself is processed by __check_func_call().
- * Otherwise new async state will be pushed for further
- * exploration.
+ /* Mark this call insn as a prune point to trigger
+ * is_state_visited() check before call itself is
+ * processed by __check_func_call(). Otherwise new
+ * async state will be pushed for further exploration.
*/
- init_explored_state(env, t);
- return visit_func_call_insn(t, insn_cnt, insns, env,
+ mark_prune_point(env, t);
+ return visit_func_call_insn(t, insns, env,
insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
@@ -12234,22 +12429,15 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
if (ret)
return ret;
- /* unconditional jmp is not a good pruning point,
- * but it's marked, since backtracking needs
- * to record jmp history in is_state_visited().
- */
- init_explored_state(env, t + insns[t].off + 1);
- /* tell verifier to check for equivalent states
- * after every call and jump
- */
- if (t + 1 < insn_cnt)
- init_explored_state(env, t + 1);
+ mark_prune_point(env, t + insns[t].off + 1);
+ mark_jmp_point(env, t + insns[t].off + 1);
return ret;
default:
/* conditional jump with two edges */
- init_explored_state(env, t);
+ mark_prune_point(env, t);
+
ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
if (ret)
return ret;
@@ -12285,7 +12473,7 @@ static int check_cfg(struct bpf_verifier_env *env)
while (env->cfg.cur_stack > 0) {
int t = insn_stack[env->cfg.cur_stack - 1];
- ret = visit_insn(t, insn_cnt, env);
+ ret = visit_insn(t, env);
switch (ret) {
case DONE_EXPLORING:
insn_state[t] = EXPLORED;
@@ -12876,15 +13064,6 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
- if (rold->type == PTR_TO_STACK)
- /* two stack pointers are equal only if they're pointing to
- * the same stack frame, since fp-8 in foo != fp-8 in bar
- */
- return equal && rold->frameno == rcur->frameno;
-
- if (equal)
- return true;
-
if (rold->type == NOT_INIT)
/* explored state can't have used this */
return true;
@@ -12892,6 +13071,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
return false;
switch (base_type(rold->type)) {
case SCALAR_VALUE:
+ if (equal)
+ return true;
if (env->explore_alu_limits)
return false;
if (rcur->type == SCALAR_VALUE) {
@@ -12938,7 +13119,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
*/
return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
range_within(rold, rcur) &&
- tnum_in(rold->var_off, rcur->var_off);
+ tnum_in(rold->var_off, rcur->var_off) &&
+ check_ids(rold->id, rcur->id, idmap);
case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
if (rcur->type != rold->type)
@@ -12962,20 +13144,14 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
/* new val must satisfy old val knowledge */
return range_within(rold, rcur) &&
tnum_in(rold->var_off, rcur->var_off);
- case PTR_TO_CTX:
- case CONST_PTR_TO_MAP:
- case PTR_TO_PACKET_END:
- case PTR_TO_FLOW_KEYS:
- case PTR_TO_SOCKET:
- case PTR_TO_SOCK_COMMON:
- case PTR_TO_TCP_SOCK:
- case PTR_TO_XDP_SOCK:
- /* Only valid matches are exact, which memcmp() above
- * would have accepted
+ case PTR_TO_STACK:
+ /* two stack pointers are equal only if they're pointing to
+ * the same stack frame, since fp-8 in foo != fp-8 in bar
*/
+ return equal && rold->frameno == rcur->frameno;
default:
- /* Don't know what's going on, just say it's not safe */
- return false;
+ /* Only valid matches are exact, which memcmp() */
+ return equal;
}
/* Shouldn't get here; if we do, say it's not safe */
@@ -13085,7 +13261,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
{
int i;
- memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
for (i = 0; i < MAX_BPF_REG; i++)
if (!regsafe(env, &old->regs[i], &cur->regs[i],
env->idmap_scratch))
@@ -13109,14 +13284,25 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->curframe != cur->curframe)
return false;
+ memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+
/* Verification state from speculative execution simulation
* must never prune a non-speculative execution one.
*/
if (old->speculative && !cur->speculative)
return false;
- if (old->active_lock.ptr != cur->active_lock.ptr ||
- old->active_lock.id != cur->active_lock.id)
+ if (old->active_lock.ptr != cur->active_lock.ptr)
+ return false;
+
+ /* Old and cur active_lock's have to be either both present
+ * or both absent.
+ */
+ if (!!old->active_lock.id != !!cur->active_lock.id)
+ return false;
+
+ if (old->active_lock.id &&
+ !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
return false;
if (old->active_rcu_lock != cur->active_rcu_lock)
@@ -13283,13 +13469,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
int i, j, err, states_cnt = 0;
bool add_new_state = env->test_state_freq ? true : false;
- cur->last_insn_idx = env->prev_insn_idx;
- if (!env->insn_aux_data[insn_idx].prune_point)
- /* this 'insn_idx' instruction wasn't marked, so we will not
- * be doing state search here
- */
- return 0;
-
/* bpf progs typically have pruning point every 4 instructions
* http://vger.kernel.org/bpfconf2019.html#session-1
* Do not add new state for future pruning if the verifier hasn't seen
@@ -13424,10 +13603,10 @@ next:
env->max_states_per_insn = states_cnt;
if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
- return push_jmp_history(env, cur);
+ return 0;
if (!add_new_state)
- return push_jmp_history(env, cur);
+ return 0;
/* There were no equivalent states, remember the current one.
* Technically the current state is not proven to be safe yet,
@@ -13567,21 +13746,31 @@ static int do_check(struct bpf_verifier_env *env)
return -E2BIG;
}
- err = is_state_visited(env, env->insn_idx);
- if (err < 0)
- return err;
- if (err == 1) {
- /* found equivalent state, can prune the search */
- if (env->log.level & BPF_LOG_LEVEL) {
- if (do_print_state)
- verbose(env, "\nfrom %d to %d%s: safe\n",
- env->prev_insn_idx, env->insn_idx,
- env->cur_state->speculative ?
- " (speculative execution)" : "");
- else
- verbose(env, "%d: safe\n", env->insn_idx);
+ state->last_insn_idx = env->prev_insn_idx;
+
+ if (is_prune_point(env, env->insn_idx)) {
+ err = is_state_visited(env, env->insn_idx);
+ if (err < 0)
+ return err;
+ if (err == 1) {
+ /* found equivalent state, can prune the search */
+ if (env->log.level & BPF_LOG_LEVEL) {
+ if (do_print_state)
+ verbose(env, "\nfrom %d to %d%s: safe\n",
+ env->prev_insn_idx, env->insn_idx,
+ env->cur_state->speculative ?
+ " (speculative execution)" : "");
+ else
+ verbose(env, "%d: safe\n", env->insn_idx);
+ }
+ goto process_bpf_exit;
}
- goto process_bpf_exit;
+ }
+
+ if (is_jmp_point(env, env->insn_idx)) {
+ err = push_jmp_history(env, state);
+ if (err)
+ return err;
}
if (signal_pending(current))
@@ -14123,10 +14312,11 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_SK_STORAGE:
case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_CGRP_STORAGE:
break;
default:
verbose(env,
- "Sleepable programs can only use array, hash, and ringbuf maps\n");
+ "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
return -EINVAL;
}
@@ -14782,6 +14972,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
continue;
+ /* Zero-extension is done by the caller. */
+ if (bpf_pseudo_kfunc_call(&insn))
+ continue;
+
if (WARN_ON(load_reg == -1)) {
verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
return -EFAULT;
@@ -15292,7 +15486,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
/* insn->imm has the btf func_id. Replace it with
- * an address (relative to __bpf_base_call).
+ * an address (relative to __bpf_call_base).
*/
desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
if (!desc) {
@@ -16464,12 +16658,22 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
ret = -EINVAL;
switch (prog->type) {
case BPF_PROG_TYPE_TRACING:
- /* fentry/fexit/fmod_ret progs can be sleepable only if they are
+
+ /* fentry/fexit/fmod_ret progs can be sleepable if they are
* attached to ALLOW_ERROR_INJECTION and are not in denylist.
*/
if (!check_non_sleepable_error_inject(btf_id) &&
within_error_injection_list(addr))
ret = 0;
+ /* fentry/fexit/fmod_ret progs can also be sleepable if they are
+ * in the fmodret id set with the KF_SLEEPABLE flag.
+ */
+ else {
+ u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
+
+ if (flags && (*flags & KF_SLEEPABLE))
+ ret = 0;
+ }
break;
case BPF_PROG_TYPE_LSM:
/* LSM progs check that they are attached to bpf_lsm_*() funcs.
@@ -16490,7 +16694,10 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
bpf_log(log, "can't modify return codes of BPF programs\n");
return -EINVAL;
}
- ret = check_attach_modify_return(addr, tname);
+ ret = -EINVAL;
+ if (btf_kfunc_is_modify_return(btf, btf_id) ||
+ !check_attach_modify_return(addr, tname))
+ ret = 0;
if (ret) {
bpf_log(log, "%s() is not modifiable\n", tname);
return ret;
@@ -16679,7 +16886,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->allow_ptr_leaks = bpf_allow_ptr_leaks();
env->allow_uninit_stack = bpf_allow_uninit_stack();
- env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();