summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2022-11-24 12:27:13 -0800
committerAlexei Starovoitov <ast@kernel.org>2022-11-24 12:54:34 -0800
commit6099754a1493467d2db15a20756e32e9a69c2cec (patch)
treebc22391f45fa9560aab584e038cbd0d65e7b15be /kernel
parentf471748b7fe5ab7ec6de4cbadffabfa7bb5b6240 (diff)
parent48671232fcb81b76be13c11b0df7089b16baea57 (diff)
downloadlinux-6099754a1493467d2db15a20756e32e9a69c2cec.tar.bz2
Merge branch 'bpf: Add bpf_rcu_read_lock() support'
Yonghong Song says: ==================== Currently, without rcu attribute info in BTF, the verifier treats rcu tagged pointer as a normal pointer. This might be a problem for sleepable program where rcu_read_lock()/unlock() is not available. For example, for a sleepable fentry program, if rcu protected memory access is interleaved with a sleepable helper/kfunc, it is possible the memory access after the sleepable helper/kfunc might be invalid since the object might have been freed then. Even without a sleepable helper/kfunc, without rcu_read_lock() protection, it is possible that the rcu protected object might be release in the middle of bpf program execution which may cause incorrect result. To prevent above cases, enable btf_type_tag("rcu") attributes, introduce new bpf_rcu_read_lock/unlock() kfuncs and add verifier support. In the rest of patch set, Patch 1 enabled btf_type_tag for __rcu attribute. Patche 2 added might_sleep in bpf_func_proto. Patch 3 added new bpf_rcu_read_lock/unlock() kfuncs and verifier support. Patch 4 added some tests for these two new kfuncs. Changelogs: v9 -> v10: . if no rcu tag support in vmlinux btf, using bpf_rcu_read_lock/unlock() will cause verification error. . at bpf_rcu_read_unlock(), invalidate rcu ptr to PTR_UNTRUSTED instead of SCALAR_VALUE. . a few other comment changes and other minor changes. v8 -> v9: . remove sleepable prog check for ld_abs/ind checking in rcu read lock region. . fix a test failure with gcc-compiled kernel. . a couple of other minor fixes. v7 -> v8: . add might_sleep in bpf_func_proto so we can easily identify whether a helper is sleepable or not. . do not enforce rcu rules for sleepable, e.g., rcu dereference must be in a bpf_rcu_read_lock region. This is to keep old code working fine. . Mark 'b' in 'b = a->b' (b is tagged with __rcu) as MEM_RCU only if 'b = a->b' in rcu read region and 'a' is trusted. This adds safety guarantee for 'b' inside the rcu read region. v6 -> v7: . rebase on top of bpf-next. . remove the patch which enables sleepable program using cgrp_local_storage map. This is orthogonal to this patch set and will be addressed separately. . mark the rcu pointer dereference result as UNTRUSTED if inside a bpf_rcu_read_lock() region. v5 -> v6: . fix selftest prog miss_unlock which tested nested locking. . add comments in selftest prog cgrp_succ to explain how to handle nested memory access after rcu memory load. v4 -> v5: . add new test to aarch64 deny list. v3 -> v4: . fix selftest failures when built with gcc. gcc doesn't support btf_type_tag yet and some tests relies on that. skip these tests if vmlinux BTF does not have btf_type_tag("rcu"). v2 -> v3: . went back to MEM_RCU approach with invalidate rcu ptr registers at bpf_rcu_read_unlock() place. . remove KF_RCU_LOCK/UNLOCK flag and compare btf_id at verification time instead. v1 -> v2: . use kfunc instead of helper for bpf_rcu_read_lock/unlock. . not use MEM_RCU bpf_type_flag, instead use active_rcu_lock in reg state to identify rcu ptr's. . Add more self tests. . add new test to s390x deny list. ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/bpf_lsm.c6
-rw-r--r--kernel/bpf/btf.c3
-rw-r--r--kernel/bpf/helpers.c14
-rw-r--r--kernel/bpf/verifier.c167
-rw-r--r--kernel/trace/bpf_trace.c4
5 files changed, 161 insertions, 33 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index d6c9b3705f24..ae0267f150b5 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -151,6 +151,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_inode_hash_btf_ids, struct, inode)
static const struct bpf_func_proto bpf_ima_inode_hash_proto = {
.func = bpf_ima_inode_hash,
.gpl_only = false,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_inode_hash_btf_ids[0],
@@ -169,6 +170,7 @@ BTF_ID_LIST_SINGLE(bpf_ima_file_hash_btf_ids, struct, file)
static const struct bpf_func_proto bpf_ima_file_hash_proto = {
.func = bpf_ima_file_hash,
.gpl_only = false,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_ima_file_hash_btf_ids[0],
@@ -221,9 +223,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:
- return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL;
+ return &bpf_ima_inode_hash_proto;
case BPF_FUNC_ima_file_hash:
- return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
+ return &bpf_ima_file_hash_proto;
case BPF_FUNC_get_attach_cookie:
return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
#ifdef CONFIG_NET
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 9dbfda2b5c6c..bd3369100239 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6238,6 +6238,9 @@ error:
/* check __percpu tag */
if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU;
+ /* check __rcu tag */
+ if (strcmp(tag_value, "rcu") == 0)
+ tmp_flag = MEM_RCU;
}
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2299bb0d89e5..a5a511430f2a 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -661,6 +661,7 @@ BPF_CALL_3(bpf_copy_from_user, void *, dst, u32, size,
const struct bpf_func_proto bpf_copy_from_user_proto = {
.func = bpf_copy_from_user,
.gpl_only = false,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
@@ -691,6 +692,7 @@ BPF_CALL_5(bpf_copy_from_user_task, void *, dst, u32, size,
const struct bpf_func_proto bpf_copy_from_user_task_proto = {
.func = bpf_copy_from_user_task,
.gpl_only = true,
+ .might_sleep = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
@@ -1988,6 +1990,16 @@ void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
return obj__ign;
}
+void bpf_rcu_read_lock(void)
+{
+ rcu_read_lock();
+}
+
+void bpf_rcu_read_unlock(void)
+{
+ rcu_read_unlock();
+}
+
__diag_pop();
BTF_SET8_START(generic_btf_ids)
@@ -2029,6 +2041,8 @@ BTF_ID(func, bpf_cgroup_release)
BTF_SET8_START(common_btf_ids)
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
BTF_ID_FLAGS(func, bpf_rdonly_cast)
+BTF_ID_FLAGS(func, bpf_rcu_read_lock)
+BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
BTF_SET8_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9528a066cfa5..f4500479f1c2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -527,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_user_ringbuf_drain;
}
+static bool is_storage_get_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_sk_storage_get ||
+ func_id == BPF_FUNC_inode_storage_get ||
+ func_id == BPF_FUNC_task_storage_get ||
+ func_id == BPF_FUNC_cgrp_storage_get;
+}
+
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
const struct bpf_map *map)
{
@@ -589,11 +597,12 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
strncpy(postfix, "_or_null", 16);
}
- snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s",
+ snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
type & MEM_RDONLY ? "rdonly_" : "",
type & MEM_RINGBUF ? "ringbuf_" : "",
type & MEM_USER ? "user_" : "",
type & MEM_PERCPU ? "percpu_" : "",
+ type & MEM_RCU ? "rcu_" : "",
type & PTR_UNTRUSTED ? "untrusted_" : "",
type & PTR_TRUSTED ? "trusted_" : ""
);
@@ -1220,6 +1229,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
dst_state->frame[i] = NULL;
}
dst_state->speculative = src->speculative;
+ dst_state->active_rcu_lock = src->active_rcu_lock;
dst_state->curframe = src->curframe;
dst_state->active_lock.ptr = src->active_lock.ptr;
dst_state->active_lock.id = src->active_lock.id;
@@ -4258,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
return reg->type == PTR_TO_FLOW_KEYS;
}
+static bool is_trusted_reg(const struct bpf_reg_state *reg)
+{
+ /* A referenced register is always trusted. */
+ if (reg->ref_obj_id)
+ return true;
+
+ /* If a register is not referenced, it is trusted if it has the
+ * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
+ * other type modifiers may be safe, but we elect to take an opt-in
+ * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
+ * not.
+ *
+ * Eventually, we should make PTR_TRUSTED the single source of truth
+ * for whether a register is trusted.
+ */
+ return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
+ !bpf_type_has_unsafe_modifiers(reg->type);
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -4737,9 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
if (type_flag(reg->type) & PTR_UNTRUSTED)
flag |= PTR_UNTRUSTED;
- /* Any pointer obtained from walking a trusted pointer is no longer trusted. */
+ /* By default any pointer obtained from walking a trusted pointer is
+ * no longer trusted except the rcu case below.
+ */
flag &= ~PTR_TRUSTED;
+ if (flag & MEM_RCU) {
+ /* Mark value register as MEM_RCU only if it is protected by
+ * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
+ * itself can already indicate trustedness inside the rcu
+ * read lock region. Also mark it as PTR_TRUSTED.
+ */
+ if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
+ flag &= ~MEM_RCU;
+ else
+ flag |= PTR_TRUSTED;
+ } else if (reg->type & MEM_RCU) {
+ /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
+ * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
+ */
+ flag |= PTR_UNTRUSTED;
+ }
+
if (atype == BPF_READ && value_regno >= 0)
mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
@@ -5897,6 +5945,7 @@ static const struct bpf_reg_types btf_ptr_types = {
.types = {
PTR_TO_BTF_ID,
PTR_TO_BTF_ID | PTR_TRUSTED,
+ PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
},
};
static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -6075,6 +6124,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* it's fixed offset must be 0. In the other cases, fixed offset
@@ -7516,6 +7566,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return -EINVAL;
}
+ if (!env->prog->aux->sleepable && fn->might_sleep) {
+ verbose(env, "helper call might sleep in a non-sleepable prog\n");
+ return -EINVAL;
+ }
+
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(fn->func);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
@@ -7534,6 +7589,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
+ if (env->cur_state->active_rcu_lock) {
+ if (fn->might_sleep) {
+ verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
+ func_id_name(func_id), func_id);
+ return -EINVAL;
+ }
+
+ if (env->prog->aux->sleepable && is_storage_get_function(func_id))
+ env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
+ }
+
meta.func_id = func_id;
/* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
@@ -7961,25 +8027,6 @@ static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
}
-static bool is_trusted_reg(const struct bpf_reg_state *reg)
-{
- /* A referenced register is always trusted. */
- if (reg->ref_obj_id)
- return true;
-
- /* If a register is not referenced, it is trusted if it has either the
- * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
- * other type modifiers may be safe, but we elect to take an opt-in
- * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
- * not.
- *
- * Eventually, we should make PTR_TRUSTED the single source of truth
- * for whether a register is trusted.
- */
- return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
- !bpf_type_has_unsafe_modifiers(reg->type);
-}
-
static bool __kfunc_param_match_suffix(const struct btf *btf,
const struct btf_param *arg,
const char *suffix)
@@ -8158,6 +8205,8 @@ enum special_kfunc_type {
KF_bpf_list_pop_back,
KF_bpf_cast_to_kern_ctx,
KF_bpf_rdonly_cast,
+ KF_bpf_rcu_read_lock,
+ KF_bpf_rcu_read_unlock,
};
BTF_SET_START(special_kfunc_set)
@@ -8180,6 +8229,18 @@ BTF_ID(func, bpf_list_pop_front)
BTF_ID(func, bpf_list_pop_back)
BTF_ID(func, bpf_cast_to_kern_ctx)
BTF_ID(func, bpf_rdonly_cast)
+BTF_ID(func, bpf_rcu_read_lock)
+BTF_ID(func, bpf_rcu_read_unlock)
+
+static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
+}
+
+static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
+}
static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
@@ -8812,6 +8873,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
const struct btf_type *t, *func, *func_proto, *ptr_type;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
+ bool sleepable, rcu_lock, rcu_unlock;
struct bpf_kfunc_call_arg_meta meta;
u32 i, nargs, func_id, ptr_type_id;
int err, insn_idx = *insn_idx_p;
@@ -8853,11 +8915,45 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EACCES;
}
- if (is_kfunc_sleepable(&meta) && !env->prog->aux->sleepable) {
+ sleepable = is_kfunc_sleepable(&meta);
+ if (sleepable && !env->prog->aux->sleepable) {
verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
return -EACCES;
}
+ rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
+ rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
+ if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
+ verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
+ return -EACCES;
+ }
+
+ if (env->cur_state->active_rcu_lock) {
+ struct bpf_func_state *state;
+ struct bpf_reg_state *reg;
+
+ if (rcu_lock) {
+ verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
+ return -EINVAL;
+ } else if (rcu_unlock) {
+ bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
+ if (reg->type & MEM_RCU) {
+ reg->type &= ~(MEM_RCU | PTR_TRUSTED);
+ reg->type |= PTR_UNTRUSTED;
+ }
+ }));
+ env->cur_state->active_rcu_lock = false;
+ } else if (sleepable) {
+ verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
+ return -EACCES;
+ }
+ } else if (rcu_lock) {
+ env->cur_state->active_rcu_lock = true;
+ } else if (rcu_unlock) {
+ verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
+ return -EINVAL;
+ }
+
/* Check the arguments */
err = check_kfunc_args(env, &meta);
if (err < 0)
@@ -11749,6 +11845,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
+ if (env->cur_state->active_rcu_lock) {
+ verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
+ return -EINVAL;
+ }
+
if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -13014,6 +13115,9 @@ static bool states_equal(struct bpf_verifier_env *env,
old->active_lock.id != cur->active_lock.id)
return false;
+ if (old->active_rcu_lock != cur->active_rcu_lock)
+ return false;
+
/* for states to be equal callsites have to be the same
* and all frame states need to be equivalent
*/
@@ -13701,6 +13805,11 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}
+ if (env->cur_state->active_rcu_lock) {
+ verbose(env, "bpf_rcu_read_unlock is missing\n");
+ return -EINVAL;
+ }
+
/* We must do check_reference_leak here before
* prepare_func_exit to handle the case when
* state->curframe > 0, it may be a callback
@@ -15489,14 +15598,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto patch_call_imm;
}
- if (insn->imm == BPF_FUNC_task_storage_get ||
- insn->imm == BPF_FUNC_sk_storage_get ||
- insn->imm == BPF_FUNC_inode_storage_get ||
- insn->imm == BPF_FUNC_cgrp_storage_get) {
- if (env->prog->aux->sleepable)
- insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
- else
+ if (is_storage_get_function(insn->imm)) {
+ if (!env->prog->aux->sleepable ||
+ env->insn_aux_data[i + delta].storage_get_func_atomic)
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
+ else
+ insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
insn_buf[1] = *insn;
cnt = 2;
@@ -16575,6 +16682,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
env->bypass_spec_v1 = bpf_bypass_spec_v1();
env->bypass_spec_v4 = bpf_bypass_spec_v4();
env->bpf_capable = bpf_capable();
+ env->rcu_tag_supported = btf_vmlinux &&
+ btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 5b9008bc597b..3bbd3f0c810c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1485,9 +1485,9 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_task_stack:
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
- return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
+ return &bpf_copy_from_user_proto;
case BPF_FUNC_copy_from_user_task:
- return prog->aux->sleepable ? &bpf_copy_from_user_task_proto : NULL;
+ return &bpf_copy_from_user_task_proto;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
case BPF_FUNC_per_cpu_ptr: