summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c40
-rw-r--r--kernel/bpf/bpf_lsm.c8
-rw-r--r--kernel/bpf/bpf_struct_ops.c3
-rw-r--r--kernel/bpf/btf.c126
-rw-r--r--kernel/bpf/core.c100
-rw-r--r--kernel/bpf/devmap.c2
-rw-r--r--kernel/bpf/hashtab.c6
-rw-r--r--kernel/bpf/local_storage.c2
-rw-r--r--kernel/bpf/lpm_trie.c2
-rw-r--r--kernel/bpf/preload/iterators/Makefile10
-rw-r--r--kernel/bpf/syscall.c36
-rw-r--r--kernel/bpf/trampoline.c163
-rw-r--r--kernel/bpf/verifier.c89
-rw-r--r--kernel/kallsyms.c91
-rw-r--r--kernel/trace/ftrace.c328
15 files changed, 724 insertions, 282 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index fe40d3b9458f..d3e734bf8056 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -70,10 +70,8 @@ int array_map_alloc_check(union bpf_attr *attr)
attr->map_flags & BPF_F_PRESERVE_ELEMS)
return -EINVAL;
- if (attr->value_size > KMALLOC_MAX_SIZE)
- /* if value_size is bigger, the user space won't be able to
- * access the elements.
- */
+ /* avoid overflow on round_up(map->value_size) */
+ if (attr->value_size > INT_MAX)
return -E2BIG;
return 0;
@@ -156,6 +154,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
return &array->map;
}
+static void *array_map_elem_ptr(struct bpf_array* array, u32 index)
+{
+ return array->value + (u64)array->elem_size * index;
+}
+
/* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{
@@ -165,7 +168,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
if (unlikely(index >= array->map.max_entries))
return NULL;
- return array->value + array->elem_size * (index & array->index_mask);
+ return array->value + (u64)array->elem_size * (index & array->index_mask);
}
static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
@@ -203,7 +206,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
- u32 elem_size = round_up(map->value_size, 8);
+ u32 elem_size = array->elem_size;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
@@ -272,7 +275,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
- size = round_up(map->value_size, 8);
+ size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
@@ -339,7 +342,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
value, map->value_size);
} else {
val = array->value +
- array->elem_size * (index & array->index_mask);
+ (u64)array->elem_size * (index & array->index_mask);
if (map_flags & BPF_F_LOCK)
copy_map_value_locked(map, val, value, false);
else
@@ -376,7 +379,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
* returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible
*/
- size = round_up(map->value_size, 8);
+ size = array->elem_size;
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
@@ -408,8 +411,7 @@ static void array_map_free_timers(struct bpf_map *map)
return;
for (i = 0; i < array->map.max_entries; i++)
- bpf_timer_cancel_and_free(array->value + array->elem_size * i +
- map->timer_off);
+ bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off);
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
@@ -420,7 +422,7 @@ static void array_map_free(struct bpf_map *map)
if (map_value_has_kptrs(map)) {
for (i = 0; i < array->map.max_entries; i++)
- bpf_map_free_kptrs(map, array->value + array->elem_size * i);
+ bpf_map_free_kptrs(map, array_map_elem_ptr(array, i));
bpf_map_free_kptr_off_tab(map);
}
@@ -556,7 +558,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
- return array->value + array->elem_size * index;
+ return array_map_elem_ptr(array, index);
}
static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
@@ -575,7 +577,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
- return array->value + array->elem_size * index;
+ return array_map_elem_ptr(array, index);
}
static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
@@ -583,6 +585,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_seq_array_map_info *info = seq->private;
struct bpf_iter__bpf_map_elem ctx = {};
struct bpf_map *map = info->map;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
@@ -603,7 +606,7 @@ static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
ctx.value = v;
} else {
pptr = v;
- size = round_up(map->value_size, 8);
+ size = array->elem_size;
for_each_possible_cpu(cpu) {
bpf_long_memcpy(info->percpu_value_buf + off,
per_cpu_ptr(pptr, cpu),
@@ -633,11 +636,12 @@ static int bpf_iter_init_array_map(void *priv_data,
{
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
struct bpf_map *map = aux->map;
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
void *value_buf;
u32 buf_size;
if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
- buf_size = round_up(map->value_size, 8) * num_possible_cpus();
+ buf_size = array->elem_size * num_possible_cpus();
value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
if (!value_buf)
return -ENOMEM;
@@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
else
- val = array->value + array->elem_size * i;
+ val = array_map_elem_ptr(array, i);
num_elems++;
key = i;
ret = callback_fn((u64)(long)map, (u64)(long)&key,
@@ -1322,7 +1326,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
- u32 elem_size = round_up(map->value_size, 8);
+ u32 elem_size = array->elem_size;
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index d469b7f3deef..fa71d58b7ded 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -63,10 +63,11 @@ BTF_ID(func, bpf_lsm_socket_post_create)
BTF_ID(func, bpf_lsm_socket_socketpair)
BTF_SET_END(bpf_lsm_unlocked_sockopt_hooks)
+#ifdef CONFIG_CGROUP_BPF
void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
bpf_func_t *bpf_func)
{
- const struct btf_param *args;
+ const struct btf_param *args __maybe_unused;
if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
btf_id_set_contains(&bpf_lsm_current_hooks,
@@ -75,9 +76,9 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
return;
}
+#ifdef CONFIG_NET
args = btf_params(prog->aux->attach_func_proto);
-#ifdef CONFIG_NET
if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET])
*bpf_func = __cgroup_bpf_run_lsm_socket;
else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK])
@@ -86,6 +87,7 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
#endif
*bpf_func = __cgroup_bpf_run_lsm_current;
}
+#endif
int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
const struct bpf_prog *prog)
@@ -219,6 +221,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_get_retval:
return prog->expected_attach_type == BPF_LSM_CGROUP ?
&bpf_get_retval_proto : NULL;
+#ifdef CONFIG_NET
case BPF_FUNC_setsockopt:
if (prog->expected_attach_type != BPF_LSM_CGROUP)
return NULL;
@@ -239,6 +242,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
prog->aux->attach_btf_id))
return &bpf_unlocked_sk_getsockopt_proto;
return NULL;
+#endif
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 7e0068c3399c..84b2d9dba79a 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -341,6 +341,9 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
tlinks[BPF_TRAMP_FENTRY].links[0] = link;
tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
+ /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
+ * and it must be used alone.
+ */
flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
return arch_prepare_bpf_trampoline(NULL, image, image_end,
model, flags, tlinks, NULL);
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 4423045b8ff3..7ac971ea98d1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -213,7 +213,7 @@ enum {
};
struct btf_kfunc_set_tab {
- struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
+ struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX];
};
struct btf_id_dtor_kfunc_tab {
@@ -1116,7 +1116,8 @@ __printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...)
*/
#define btf_show_type_value(show, fmt, value) \
do { \
- if ((value) != 0 || (show->flags & BTF_SHOW_ZERO) || \
+ if ((value) != (__typeof__(value))0 || \
+ (show->flags & BTF_SHOW_ZERO) || \
show->state.depth == 0) { \
btf_show(show, "%s%s" fmt "%s%s", \
btf_show_indent(show), \
@@ -1615,7 +1616,7 @@ static void btf_free_id(struct btf *btf)
static void btf_free_kfunc_set_tab(struct btf *btf)
{
struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
- int hook, type;
+ int hook;
if (!tab)
return;
@@ -1624,10 +1625,8 @@ static void btf_free_kfunc_set_tab(struct btf *btf)
*/
if (btf_is_module(btf))
goto free_tab;
- for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
- for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
- kfree(tab->sets[hook][type]);
- }
+ for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++)
+ kfree(tab->sets[hook]);
free_tab:
kfree(tab);
btf->kfunc_set_tab = NULL;
@@ -6171,13 +6170,14 @@ static bool is_kfunc_arg_mem_size(const struct btf *btf,
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
struct bpf_reg_state *regs,
- bool ptr_to_mem_ok)
+ bool ptr_to_mem_ok,
+ u32 kfunc_flags)
{
enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+ bool rel = false, kptr_get = false, trusted_arg = false;
struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
- bool rel = false, kptr_get = false;
const char *func_name, *ref_tname;
const struct btf_type *t, *ref_t;
const struct btf_param *args;
@@ -6209,10 +6209,9 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
if (is_kfunc) {
/* Only kfunc can be release func */
- rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_RELEASE, func_id);
- kptr_get = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_KPTR_ACQUIRE, func_id);
+ rel = kfunc_flags & KF_RELEASE;
+ kptr_get = kfunc_flags & KF_KPTR_GET;
+ trusted_arg = kfunc_flags & KF_TRUSTED_ARGS;
}
/* check that BTF function arguments match actual types that the
@@ -6237,10 +6236,19 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
return -EINVAL;
}
+ /* Check if argument must be a referenced pointer, args + i has
+ * been verified to be a pointer (after skipping modifiers).
+ */
+ if (is_kfunc && trusted_arg && !reg->ref_obj_id) {
+ bpf_log(log, "R%d must be referenced\n", regno);
+ return -EINVAL;
+ }
+
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
- if (rel && reg->ref_obj_id)
+ /* Trusted args have the same offset checks as release arguments */
+ if (trusted_arg || (rel && reg->ref_obj_id))
arg_type |= OBJ_RELEASE;
ret = check_func_arg_reg_off(env, reg, regno, arg_type);
if (ret < 0)
@@ -6338,7 +6346,8 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
reg_ref_tname = btf_name_by_offset(reg_btf,
reg_ref_t->name_off);
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
- reg->off, btf, ref_id, rel && reg->ref_obj_id)) {
+ reg->off, btf, ref_id,
+ trusted_arg || (rel && reg->ref_obj_id))) {
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
func_name, i,
btf_type_str(ref_t), ref_tname,
@@ -6441,7 +6450,7 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
return -EINVAL;
is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
- err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
+ err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global, 0);
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function.
@@ -6454,9 +6463,10 @@ int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
const struct btf *btf, u32 func_id,
- struct bpf_reg_state *regs)
+ struct bpf_reg_state *regs,
+ u32 kfunc_flags)
{
- return btf_check_func_arg_match(env, btf, func_id, regs, true);
+ return btf_check_func_arg_match(env, btf, func_id, regs, true, kfunc_flags);
}
/* Convert BTF of a function into bpf_reg_state if possible
@@ -6853,6 +6863,11 @@ bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
}
+static void *btf_id_set8_contains(const struct btf_id_set8 *set, u32 id)
+{
+ return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func);
+}
+
enum {
BTF_MODULE_F_LIVE = (1 << 0),
};
@@ -7101,16 +7116,16 @@ BTF_TRACING_TYPE_xxx
/* Kernel Function (kfunc) BTF ID set registration API */
-static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
- enum btf_kfunc_type type,
- struct btf_id_set *add_set, bool vmlinux_set)
+static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
+ struct btf_id_set8 *add_set)
{
+ bool vmlinux_set = !btf_is_module(btf);
struct btf_kfunc_set_tab *tab;
- struct btf_id_set *set;
+ struct btf_id_set8 *set;
u32 set_cnt;
int ret;
- if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
+ if (hook >= BTF_KFUNC_HOOK_MAX) {
ret = -EINVAL;
goto end;
}
@@ -7126,7 +7141,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
btf->kfunc_set_tab = tab;
}
- set = tab->sets[hook][type];
+ set = tab->sets[hook];
/* Warn when register_btf_kfunc_id_set is called twice for the same hook
* for module sets.
*/
@@ -7140,7 +7155,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
* pointer and return.
*/
if (!vmlinux_set) {
- tab->sets[hook][type] = add_set;
+ tab->sets[hook] = add_set;
return 0;
}
@@ -7149,7 +7164,7 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
* and concatenate all individual sets being registered. While each set
* is individually sorted, they may become unsorted when concatenated,
* hence re-sorting the final set again is required to make binary
- * searching the set using btf_id_set_contains function work.
+ * searching the set using btf_id_set8_contains function work.
*/
set_cnt = set ? set->cnt : 0;
@@ -7164,8 +7179,8 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
}
/* Grow set */
- set = krealloc(tab->sets[hook][type],
- offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
+ set = krealloc(tab->sets[hook],
+ offsetof(struct btf_id_set8, pairs[set_cnt + add_set->cnt]),
GFP_KERNEL | __GFP_NOWARN);
if (!set) {
ret = -ENOMEM;
@@ -7173,15 +7188,15 @@ static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
}
/* For newly allocated set, initialize set->cnt to 0 */
- if (!tab->sets[hook][type])
+ if (!tab->sets[hook])
set->cnt = 0;
- tab->sets[hook][type] = set;
+ tab->sets[hook] = set;
/* Concatenate the two sets */
- memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
+ memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0]));
set->cnt += add_set->cnt;
- sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
+ sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL);
return 0;
end:
@@ -7189,38 +7204,25 @@ end:
return ret;
}
-static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
- const struct btf_kfunc_id_set *kset)
-{
- bool vmlinux_set = !btf_is_module(btf);
- int type, ret = 0;
-
- for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
- if (!kset->sets[type])
- continue;
-
- ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
- if (ret)
- break;
- }
- return ret;
-}
-
-static bool __btf_kfunc_id_set_contains(const struct btf *btf,
+static u32 *__btf_kfunc_id_set_contains(const struct btf *btf,
enum btf_kfunc_hook hook,
- enum btf_kfunc_type type,
u32 kfunc_btf_id)
{
- struct btf_id_set *set;
+ struct btf_id_set8 *set;
+ u32 *id;
- if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
- return false;
+ if (hook >= BTF_KFUNC_HOOK_MAX)
+ return NULL;
if (!btf->kfunc_set_tab)
- return false;
- set = btf->kfunc_set_tab->sets[hook][type];
+ return NULL;
+ set = btf->kfunc_set_tab->sets[hook];
if (!set)
- return false;
- return btf_id_set_contains(set, kfunc_btf_id);
+ return NULL;
+ id = btf_id_set8_contains(set, kfunc_btf_id);
+ if (!id)
+ return NULL;
+ /* The flags for BTF ID are located next to it */
+ return id + 1;
}
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
@@ -7248,14 +7250,14 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
* keeping the reference for the duration of the call provides the necessary
* protection for looking up a well-formed btf->kfunc_set_tab.
*/
-bool btf_kfunc_id_set_contains(const struct btf *btf,
+u32 *btf_kfunc_id_set_contains(const struct btf *btf,
enum bpf_prog_type prog_type,
- enum btf_kfunc_type type, u32 kfunc_btf_id)
+ u32 kfunc_btf_id)
{
enum btf_kfunc_hook hook;
hook = bpf_prog_type_to_kfunc_hook(prog_type);
- return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
+ return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
}
/* This function must be invoked only from initcalls/module init functions */
@@ -7282,7 +7284,7 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
return PTR_ERR(btf);
hook = bpf_prog_type_to_kfunc_hook(prog_type);
- ret = btf_populate_kfunc_set(btf, hook, kset);
+ ret = btf_populate_kfunc_set(btf, hook, kset->set);
btf_put(btf);
return ret;
}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bfeb9b937315..c1e10d088dbb 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -652,12 +652,6 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp)
return fp->jited && !bpf_prog_was_classic(fp);
}
-static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
-{
- return list_empty(&fp->aux->ksym.lnode) ||
- fp->aux->ksym.lnode.prev == LIST_POISON2;
-}
-
void bpf_prog_kallsyms_add(struct bpf_prog *fp)
{
if (!bpf_prog_kallsyms_candidate(fp) ||
@@ -833,15 +827,6 @@ struct bpf_prog_pack {
#define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE)
-static size_t bpf_prog_pack_size = -1;
-static size_t bpf_prog_pack_mask = -1;
-
-static int bpf_prog_chunk_count(void)
-{
- WARN_ON_ONCE(bpf_prog_pack_size == -1);
- return bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE;
-}
-
static DEFINE_MUTEX(pack_mutex);
static LIST_HEAD(pack_list);
@@ -849,55 +834,33 @@ static LIST_HEAD(pack_list);
* CONFIG_MMU=n. Use PAGE_SIZE in these cases.
*/
#ifdef PMD_SIZE
-#define BPF_HPAGE_SIZE PMD_SIZE
-#define BPF_HPAGE_MASK PMD_MASK
+#define BPF_PROG_PACK_SIZE (PMD_SIZE * num_possible_nodes())
#else
-#define BPF_HPAGE_SIZE PAGE_SIZE
-#define BPF_HPAGE_MASK PAGE_MASK
+#define BPF_PROG_PACK_SIZE PAGE_SIZE
#endif
-static size_t select_bpf_prog_pack_size(void)
-{
- size_t size;
- void *ptr;
-
- size = BPF_HPAGE_SIZE * num_online_nodes();
- ptr = module_alloc(size);
-
- /* Test whether we can get huge pages. If not just use PAGE_SIZE
- * packs.
- */
- if (!ptr || !is_vm_area_hugepages(ptr)) {
- size = PAGE_SIZE;
- bpf_prog_pack_mask = PAGE_MASK;
- } else {
- bpf_prog_pack_mask = BPF_HPAGE_MASK;
- }
-
- vfree(ptr);
- return size;
-}
+#define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE)
static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_prog_pack *pack;
- pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(bpf_prog_chunk_count())),
+ pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)),
GFP_KERNEL);
if (!pack)
return NULL;
- pack->ptr = module_alloc(bpf_prog_pack_size);
+ pack->ptr = module_alloc(BPF_PROG_PACK_SIZE);
if (!pack->ptr) {
kfree(pack);
return NULL;
}
- bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size);
- bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE);
+ bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE);
+ bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE);
list_add_tail(&pack->list, &pack_list);
set_vm_flush_reset_perms(pack->ptr);
- set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
- set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE);
+ set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+ set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
return pack;
}
@@ -909,10 +872,7 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn
void *ptr = NULL;
mutex_lock(&pack_mutex);
- if (bpf_prog_pack_size == -1)
- bpf_prog_pack_size = select_bpf_prog_pack_size();
-
- if (size > bpf_prog_pack_size) {
+ if (size > BPF_PROG_PACK_SIZE) {
size = round_up(size, PAGE_SIZE);
ptr = module_alloc(size);
if (ptr) {
@@ -924,9 +884,9 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn
goto out;
}
list_for_each_entry(pack, &pack_list, list) {
- pos = bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
+ pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
nbits, 0);
- if (pos < bpf_prog_chunk_count())
+ if (pos < BPF_PROG_CHUNK_COUNT)
goto found_free_area;
}
@@ -950,18 +910,15 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
struct bpf_prog_pack *pack = NULL, *tmp;
unsigned int nbits;
unsigned long pos;
- void *pack_ptr;
mutex_lock(&pack_mutex);
- if (hdr->size > bpf_prog_pack_size) {
+ if (hdr->size > BPF_PROG_PACK_SIZE) {
module_memfree(hdr);
goto out;
}
- pack_ptr = (void *)((unsigned long)hdr & bpf_prog_pack_mask);
-
list_for_each_entry(tmp, &pack_list, list) {
- if (tmp->ptr == pack_ptr) {
+ if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) {
pack = tmp;
break;
}
@@ -971,14 +928,14 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr)
goto out;
nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
- pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT;
+ pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
"bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
bitmap_clear(pack->bitmap, pos, nbits);
- if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0,
- bpf_prog_chunk_count(), 0) == 0) {
+ if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0,
+ BPF_PROG_CHUNK_COUNT, 0) == 0) {
list_del(&pack->list);
module_memfree(pack->ptr);
kfree(pack);
@@ -1155,7 +1112,6 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog,
bpf_prog_pack_free(ro_header);
return PTR_ERR(ptr);
}
- prog->aux->use_bpf_prog_pack = true;
return 0;
}
@@ -1179,17 +1135,23 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header,
bpf_jit_uncharge_modmem(size);
}
+struct bpf_binary_header *
+bpf_jit_binary_pack_hdr(const struct bpf_prog *fp)
+{
+ unsigned long real_start = (unsigned long)fp->bpf_func;
+ unsigned long addr;
+
+ addr = real_start & BPF_PROG_CHUNK_MASK;
+ return (void *)addr;
+}
+
static inline struct bpf_binary_header *
bpf_jit_binary_hdr(const struct bpf_prog *fp)
{
unsigned long real_start = (unsigned long)fp->bpf_func;
unsigned long addr;
- if (fp->aux->use_bpf_prog_pack)
- addr = real_start & BPF_PROG_CHUNK_MASK;
- else
- addr = real_start & PAGE_MASK;
-
+ addr = real_start & PAGE_MASK;
return (void *)addr;
}
@@ -1202,11 +1164,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
if (fp->jited) {
struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
- if (fp->aux->use_bpf_prog_pack)
- bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */);
- else
- bpf_jit_binary_free(hdr);
-
+ bpf_jit_binary_free(hdr);
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index c2867068e5bd..1400561efb15 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -845,7 +845,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab_netdev *dev;
dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev),
- GFP_ATOMIC | __GFP_NOWARN,
+ GFP_NOWAIT | __GFP_NOWARN,
dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 17fb69c0e0dc..da7578426a46 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -61,7 +61,7 @@
*
* As regular device interrupt handlers and soft interrupts are forced into
* thread context, the existing code which does
- * spin_lock*(); alloc(GPF_ATOMIC); spin_unlock*();
+ * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*();
* just works.
*
* In theory the BPF locks could be converted to regular spinlocks as well,
@@ -978,7 +978,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
goto dec_count;
}
l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
- GFP_ATOMIC | __GFP_NOWARN,
+ GFP_NOWAIT | __GFP_NOWARN,
htab->map.numa_node);
if (!l_new) {
l_new = ERR_PTR(-ENOMEM);
@@ -996,7 +996,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
} else {
/* alloc_percpu zero-fills */
pptr = bpf_map_alloc_percpu(&htab->map, size, 8,
- GFP_ATOMIC | __GFP_NOWARN);
+ GFP_NOWAIT | __GFP_NOWARN);
if (!pptr) {
kfree(l_new);
l_new = ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 8654fc97f5fe..49ef0ce040c7 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -165,7 +165,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key,
}
new = bpf_map_kmalloc_node(map, struct_size(new, data, map->value_size),
- __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
+ __GFP_ZERO | GFP_NOWAIT | __GFP_NOWARN,
map->numa_node);
if (!new)
return -ENOMEM;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index f0d05a3cc4b9..d789e3b831ad 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -285,7 +285,7 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
if (value)
size += trie->map.value_size;
- node = bpf_map_kmalloc_node(&trie->map, size, GFP_ATOMIC | __GFP_NOWARN,
+ node = bpf_map_kmalloc_node(&trie->map, size, GFP_NOWAIT | __GFP_NOWARN,
trie->map.numa_node);
if (!node)
return NULL;
diff --git a/kernel/bpf/preload/iterators/Makefile b/kernel/bpf/preload/iterators/Makefile
index bfe24f8c5a20..6762b1260f2f 100644
--- a/kernel/bpf/preload/iterators/Makefile
+++ b/kernel/bpf/preload/iterators/Makefile
@@ -9,7 +9,7 @@ LLVM_STRIP ?= llvm-strip
TOOLS_PATH := $(abspath ../../../../tools)
BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool
BPFTOOL_OUTPUT := $(abs_out)/bpftool
-DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool
+DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf
@@ -61,9 +61,5 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU
OUTPUT=$(abspath $(dir $@))/ prefix= \
DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers
-$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \
- OUTPUT=$(BPFTOOL_OUTPUT)/ \
- LIBBPF_OUTPUT=$(LIBBPF_OUTPUT)/ \
- LIBBPF_DESTDIR=$(LIBBPF_DESTDIR)/ \
- prefix= DESTDIR=$(abs_out)/ install-bin
+$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ab688d85b2c6..83c7136c5788 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -419,35 +419,53 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
#ifdef CONFIG_MEMCG_KMEM
static void bpf_map_save_memcg(struct bpf_map *map)
{
- map->memcg = get_mem_cgroup_from_mm(current->mm);
+ /* Currently if a map is created by a process belonging to the root
+ * memory cgroup, get_obj_cgroup_from_current() will return NULL.
+ * So we have to check map->objcg for being NULL each time it's
+ * being used.
+ */
+ map->objcg = get_obj_cgroup_from_current();
}
static void bpf_map_release_memcg(struct bpf_map *map)
{
- mem_cgroup_put(map->memcg);
+ if (map->objcg)
+ obj_cgroup_put(map->objcg);
+}
+
+static struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map)
+{
+ if (map->objcg)
+ return get_mem_cgroup_from_objcg(map->objcg);
+
+ return root_mem_cgroup;
}
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
int node)
{
- struct mem_cgroup *old_memcg;
+ struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- old_memcg = set_active_memcg(map->memcg);
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node);
set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
return ptr;
}
void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
{
- struct mem_cgroup *old_memcg;
+ struct mem_cgroup *memcg, *old_memcg;
void *ptr;
- old_memcg = set_active_memcg(map->memcg);
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
ptr = kzalloc(size, flags | __GFP_ACCOUNT);
set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
return ptr;
}
@@ -455,12 +473,14 @@ void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags)
void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
size_t align, gfp_t flags)
{
- struct mem_cgroup *old_memcg;
+ struct mem_cgroup *memcg, *old_memcg;
void __percpu *ptr;
- old_memcg = set_active_memcg(map->memcg);
+ memcg = bpf_map_get_memcg(map);
+ old_memcg = set_active_memcg(memcg);
ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT);
set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
return ptr;
}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 6cd226584c33..42e387a12694 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -13,6 +13,7 @@
#include <linux/static_call.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf_lsm.h>
+#include <linux/delay.h>
/* dummy _ops. The verifier will operate on target program's ops. */
const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -29,6 +30,81 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline_table */
static DEFINE_MUTEX(trampoline_mutex);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex);
+
+static int bpf_tramp_ftrace_ops_func(struct ftrace_ops *ops, enum ftrace_ops_cmd cmd)
+{
+ struct bpf_trampoline *tr = ops->private;
+ int ret = 0;
+
+ if (cmd == FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF) {
+ /* This is called inside register_ftrace_direct_multi(), so
+ * tr->mutex is already locked.
+ */
+ lockdep_assert_held_once(&tr->mutex);
+
+ /* Instead of updating the trampoline here, we propagate
+ * -EAGAIN to register_ftrace_direct_multi(). Then we can
+ * retry register_ftrace_direct_multi() after updating the
+ * trampoline.
+ */
+ if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
+ !(tr->flags & BPF_TRAMP_F_ORIG_STACK)) {
+ if (WARN_ON_ONCE(tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY))
+ return -EBUSY;
+
+ tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
+ return -EAGAIN;
+ }
+
+ return 0;
+ }
+
+ /* The normal locking order is
+ * tr->mutex => direct_mutex (ftrace.c) => ftrace_lock (ftrace.c)
+ *
+ * The following two commands are called from
+ *
+ * prepare_direct_functions_for_ipmodify
+ * cleanup_direct_functions_after_ipmodify
+ *
+ * In both cases, direct_mutex is already locked. Use
+ * mutex_trylock(&tr->mutex) to avoid deadlock in race condition
+ * (something else is making changes to this same trampoline).
+ */
+ if (!mutex_trylock(&tr->mutex)) {
+ /* sleep 1 ms to make sure whatever holding tr->mutex makes
+ * some progress.
+ */
+ msleep(1);
+ return -EAGAIN;
+ }
+
+ switch (cmd) {
+ case FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER:
+ tr->flags |= BPF_TRAMP_F_SHARE_IPMODIFY;
+
+ if ((tr->flags & BPF_TRAMP_F_CALL_ORIG) &&
+ !(tr->flags & BPF_TRAMP_F_ORIG_STACK))
+ ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
+ break;
+ case FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER:
+ tr->flags &= ~BPF_TRAMP_F_SHARE_IPMODIFY;
+
+ if (tr->flags & BPF_TRAMP_F_ORIG_STACK)
+ ret = bpf_trampoline_update(tr, false /* lock_direct_mutex */);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ };
+
+ mutex_unlock(&tr->mutex);
+ return ret;
+}
+#endif
+
bool bpf_prog_has_trampoline(const struct bpf_prog *prog)
{
enum bpf_attach_type eatype = prog->expected_attach_type;
@@ -89,6 +165,16 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
tr = kzalloc(sizeof(*tr), GFP_KERNEL);
if (!tr)
goto out;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ tr->fops = kzalloc(sizeof(struct ftrace_ops), GFP_KERNEL);
+ if (!tr->fops) {
+ kfree(tr);
+ tr = NULL;
+ goto out;
+ }
+ tr->fops->private = tr;
+ tr->fops->ops_func = bpf_tramp_ftrace_ops_func;
+#endif
tr->key = key;
INIT_HLIST_NODE(&tr->hlist);
@@ -128,7 +214,7 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
int ret;
if (tr->func.ftrace_managed)
- ret = unregister_ftrace_direct((long)ip, (long)old_addr);
+ ret = unregister_ftrace_direct_multi(tr->fops, (long)old_addr);
else
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
@@ -137,15 +223,20 @@ static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
return ret;
}
-static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
+static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr,
+ bool lock_direct_mutex)
{
void *ip = tr->func.addr;
int ret;
- if (tr->func.ftrace_managed)
- ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
- else
+ if (tr->func.ftrace_managed) {
+ if (lock_direct_mutex)
+ ret = modify_ftrace_direct_multi(tr->fops, (long)new_addr);
+ else
+ ret = modify_ftrace_direct_multi_nolock(tr->fops, (long)new_addr);
+ } else {
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
+ }
return ret;
}
@@ -163,10 +254,12 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
if (bpf_trampoline_module_get(tr))
return -ENOENT;
- if (tr->func.ftrace_managed)
- ret = register_ftrace_direct((long)ip, (long)new_addr);
- else
+ if (tr->func.ftrace_managed) {
+ ftrace_set_filter_ip(tr->fops, (unsigned long)ip, 0, 0);
+ ret = register_ftrace_direct_multi(tr->fops, (long)new_addr);
+ } else {
ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
+ }
if (ret)
bpf_trampoline_module_put(tr);
@@ -332,11 +425,11 @@ out:
return ERR_PTR(err);
}
-static int bpf_trampoline_update(struct bpf_trampoline *tr)
+static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mutex)
{
struct bpf_tramp_image *im;
struct bpf_tramp_links *tlinks;
- u32 flags = BPF_TRAMP_F_RESTORE_REGS;
+ u32 orig_flags = tr->flags;
bool ip_arg = false;
int err, total;
@@ -358,15 +451,31 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
goto out;
}
+ /* clear all bits except SHARE_IPMODIFY */
+ tr->flags &= BPF_TRAMP_F_SHARE_IPMODIFY;
+
if (tlinks[BPF_TRAMP_FEXIT].nr_links ||
- tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links)
- flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+ tlinks[BPF_TRAMP_MODIFY_RETURN].nr_links) {
+ /* NOTE: BPF_TRAMP_F_RESTORE_REGS and BPF_TRAMP_F_SKIP_FRAME
+ * should not be set together.
+ */
+ tr->flags |= BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
+ } else {
+ tr->flags |= BPF_TRAMP_F_RESTORE_REGS;
+ }
if (ip_arg)
- flags |= BPF_TRAMP_F_IP_ARG;
+ tr->flags |= BPF_TRAMP_F_IP_ARG;
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+again:
+ if ((tr->flags & BPF_TRAMP_F_SHARE_IPMODIFY) &&
+ (tr->flags & BPF_TRAMP_F_CALL_ORIG))
+ tr->flags |= BPF_TRAMP_F_ORIG_STACK;
+#endif
err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
- &tr->func.model, flags, tlinks,
+ &tr->func.model, tr->flags, tlinks,
tr->func.addr);
if (err < 0)
goto out;
@@ -375,17 +484,34 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
WARN_ON(!tr->cur_image && tr->selector);
if (tr->cur_image)
/* progs already running at this address */
- err = modify_fentry(tr, tr->cur_image->image, im->image);
+ err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex);
else
/* first time registering */
err = register_fentry(tr, im->image);
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ if (err == -EAGAIN) {
+ /* -EAGAIN from bpf_tramp_ftrace_ops_func. Now
+ * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the
+ * trampoline again, and retry register.
+ */
+ /* reset fops->func and fops->trampoline for re-register */
+ tr->fops->func = NULL;
+ tr->fops->trampoline = 0;
+ goto again;
+ }
+#endif
if (err)
goto out;
+
if (tr->cur_image)
bpf_tramp_image_put(tr->cur_image);
tr->cur_image = im;
tr->selector++;
out:
+ /* If any error happens, restore previous flags */
+ if (err)
+ tr->flags = orig_flags;
kfree(tlinks);
return err;
}
@@ -451,7 +577,7 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_tr
hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]);
tr->progs_cnt[kind]++;
- err = bpf_trampoline_update(tr);
+ err = bpf_trampoline_update(tr, true /* lock_direct_mutex */);
if (err) {
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
@@ -484,7 +610,7 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_
}
hlist_del_init(&link->tramp_hlist);
tr->progs_cnt[kind]--;
- return bpf_trampoline_update(tr);
+ return bpf_trampoline_update(tr, true /* lock_direct_mutex */);
}
/* bpf_trampoline_unlink_prog() should never fail. */
@@ -498,7 +624,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin
return err;
}
-#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
+#if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
static void bpf_shim_tramp_link_release(struct bpf_link *link)
{
struct bpf_shim_tramp_link *shim_link =
@@ -712,6 +838,7 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
* multiple rcu callbacks.
*/
hlist_del(&tr->hlist);
+ kfree(tr->fops);
kfree(tr);
out:
mutex_unlock(&trampoline_mutex);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 328cfab3af60..096fdac70165 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5533,17 +5533,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
-static bool arg_type_is_alloc_size(enum bpf_arg_type type)
-{
- return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
-}
-
-static bool arg_type_is_int_ptr(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_INT ||
- type == ARG_PTR_TO_LONG;
-}
-
static bool arg_type_is_release(enum bpf_arg_type type)
{
return type & OBJ_RELEASE;
@@ -5929,7 +5918,8 @@ skip_type_check:
meta->ref_obj_id = reg->ref_obj_id;
}
- if (arg_type == ARG_CONST_MAP_PTR) {
+ switch (base_type(arg_type)) {
+ case ARG_CONST_MAP_PTR:
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
if (meta->map_ptr) {
/* Use map_uid (which is unique id of inner map) to reject:
@@ -5954,7 +5944,8 @@ skip_type_check:
}
meta->map_ptr = reg->map_ptr;
meta->map_uid = reg->map_uid;
- } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
+ break;
+ case ARG_PTR_TO_MAP_KEY:
/* bpf_map_xxx(..., map_ptr, ..., key) call:
* check that [key, key + map->key_size) are within
* stack limits and initialized
@@ -5971,7 +5962,8 @@ skip_type_check:
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
- } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
+ break;
+ case ARG_PTR_TO_MAP_VALUE:
if (type_may_be_null(arg_type) && register_is_null(reg))
return 0;
@@ -5987,14 +5979,16 @@ skip_type_check:
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
- } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+ break;
+ case ARG_PTR_TO_PERCPU_BTF_ID:
if (!reg->btf_id) {
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
return -EACCES;
}
meta->ret_btf = reg->btf;
meta->ret_btf_id = reg->btf_id;
- } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+ break;
+ case ARG_PTR_TO_SPIN_LOCK:
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
return -EACCES;
@@ -6005,12 +5999,15 @@ skip_type_check:
verbose(env, "verifier internal error\n");
return -EFAULT;
}
- } else if (arg_type == ARG_PTR_TO_TIMER) {
+ break;
+ case ARG_PTR_TO_TIMER:
if (process_timer_func(env, regno, meta))
return -EACCES;
- } else if (arg_type == ARG_PTR_TO_FUNC) {
+ break;
+ case ARG_PTR_TO_FUNC:
meta->subprogno = reg->subprogno;
- } else if (base_type(arg_type) == ARG_PTR_TO_MEM) {
+ break;
+ case ARG_PTR_TO_MEM:
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
*/
@@ -6020,11 +6017,14 @@ skip_type_check:
fn->arg_size[arg], false,
meta);
}
- } else if (arg_type_is_mem_size(arg_type)) {
- bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
-
- err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
- } else if (arg_type_is_dynptr(arg_type)) {
+ break;
+ case ARG_CONST_SIZE:
+ err = check_mem_size_reg(env, reg, regno, false, meta);
+ break;
+ case ARG_CONST_SIZE_OR_ZERO:
+ err = check_mem_size_reg(env, reg, regno, true, meta);
+ break;
+ case ARG_PTR_TO_DYNPTR:
if (arg_type & MEM_UNINIT) {
if (!is_dynptr_reg_valid_uninit(env, reg)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
@@ -6058,21 +6058,28 @@ skip_type_check:
err_extra, arg + 1);
return -EINVAL;
}
- } else if (arg_type_is_alloc_size(arg_type)) {
+ break;
+ case ARG_CONST_ALLOC_SIZE_OR_ZERO:
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
regno);
return -EACCES;
}
meta->mem_size = reg->var_off.value;
- } else if (arg_type_is_int_ptr(arg_type)) {
+ break;
+ case ARG_PTR_TO_INT:
+ case ARG_PTR_TO_LONG:
+ {
int size = int_ptr_type_to_size(arg_type);
err = check_helper_mem_access(env, regno, size, false, meta);
if (err)
return err;
err = check_ptr_alignment(env, reg, 0, size, true);
- } else if (arg_type == ARG_PTR_TO_CONST_STR) {
+ break;
+ }
+ case ARG_PTR_TO_CONST_STR:
+ {
struct bpf_map *map = reg->map_ptr;
int map_off;
u64 map_addr;
@@ -6111,9 +6118,12 @@ skip_type_check:
verbose(env, "string is not zero-terminated\n");
return -EINVAL;
}
- } else if (arg_type == ARG_PTR_TO_KPTR) {
+ break;
+ }
+ case ARG_PTR_TO_KPTR:
if (process_kptr_func(env, regno, meta))
return -EACCES;
+ break;
}
return err;
@@ -7160,6 +7170,7 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
const struct bpf_func_proto *fn = NULL;
enum bpf_return_type ret_type;
enum bpf_type_flag ret_flag;
@@ -7321,7 +7332,8 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
break;
case BPF_FUNC_set_retval:
- if (env->prog->expected_attach_type == BPF_LSM_CGROUP) {
+ if (prog_type == BPF_PROG_TYPE_LSM &&
+ env->prog->expected_attach_type == BPF_LSM_CGROUP) {
if (!env->prog->aux->attach_func_proto->type) {
/* Make sure programs that attach to void
* hooks don't try to modify return value.
@@ -7550,6 +7562,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
struct btf *desc_btf;
+ u32 *kfunc_flags;
bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
@@ -7565,18 +7578,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
func_name = btf_name_by_offset(desc_btf, func->name_off);
func_proto = btf_type_by_id(desc_btf, func->type);
- if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_CHECK, func_id)) {
+ kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
+ if (!kfunc_flags) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
-
- acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_ACQUIRE, func_id);
+ acq = *kfunc_flags & KF_ACQUIRE;
/* Check the arguments */
- err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
+ err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags);
if (err < 0)
return err;
/* In case of release function, we get register number of refcounted
@@ -7620,8 +7631,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
- if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+ if (*kfunc_flags & KF_RET_NULL) {
regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
regs[BPF_REG_0].id = ++env->id_gen;
@@ -12562,6 +12572,7 @@ static bool is_tracing_prog_type(enum bpf_prog_type type)
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
return true;
default:
return false;
@@ -13620,6 +13631,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
/* Below members will be freed only at prog->aux */
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
+ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
@@ -13632,9 +13644,6 @@ static int jit_subprogs(struct bpf_verifier_env *env)
poke->aux = func[i]->aux;
}
- /* Use bpf_prog_F_tag to indicate functions in stack traces.
- * Long term would need debug info to populate names
- */
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index fbdf8d3279ac..79a85834ce9d 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/bsearch.h>
+#include <linux/btf_ids.h>
/*
* These will be re-linked against their real values
@@ -799,6 +800,96 @@ static const struct seq_operations kallsyms_op = {
.show = s_show
};
+#ifdef CONFIG_BPF_SYSCALL
+
+struct bpf_iter__ksym {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct kallsym_iter *, ksym);
+};
+
+static int ksym_prog_seq_show(struct seq_file *m, bool in_stop)
+{
+ struct bpf_iter__ksym ctx;
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = m;
+ prog = bpf_iter_get_info(&meta, in_stop);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.ksym = m ? m->private : NULL;
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int bpf_iter_ksym_seq_show(struct seq_file *m, void *p)
+{
+ return ksym_prog_seq_show(m, false);
+}
+
+static void bpf_iter_ksym_seq_stop(struct seq_file *m, void *p)
+{
+ if (!p)
+ (void) ksym_prog_seq_show(m, true);
+ else
+ s_stop(m, p);
+}
+
+static const struct seq_operations bpf_iter_ksym_ops = {
+ .start = s_start,
+ .next = s_next,
+ .stop = bpf_iter_ksym_seq_stop,
+ .show = bpf_iter_ksym_seq_show,
+};
+
+static int bpf_iter_ksym_init(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct kallsym_iter *iter = priv_data;
+
+ reset_iter(iter, 0);
+
+ /* cache here as in kallsyms_open() case; use current process
+ * credentials to tell BPF iterators if values should be shown.
+ */
+ iter->show_value = kallsyms_show_value(current_cred());
+
+ return 0;
+}
+
+DEFINE_BPF_ITER_FUNC(ksym, struct bpf_iter_meta *meta, struct kallsym_iter *ksym)
+
+static const struct bpf_iter_seq_info ksym_iter_seq_info = {
+ .seq_ops = &bpf_iter_ksym_ops,
+ .init_seq_private = bpf_iter_ksym_init,
+ .fini_seq_private = NULL,
+ .seq_priv_size = sizeof(struct kallsym_iter),
+};
+
+static struct bpf_iter_reg ksym_iter_reg_info = {
+ .target = "ksym",
+ .feature = BPF_ITER_RESCHED,
+ .ctx_arg_info_size = 1,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__ksym, ksym),
+ PTR_TO_BTF_ID_OR_NULL },
+ },
+ .seq_info = &ksym_iter_seq_info,
+};
+
+BTF_ID_LIST(btf_ksym_iter_id)
+BTF_ID(struct, kallsym_iter)
+
+static int __init bpf_ksym_iter_register(void)
+{
+ ksym_iter_reg_info.ctx_arg_info[0].btf_id = *btf_ksym_iter_id;
+ return bpf_iter_reg_target(&ksym_iter_reg_info);
+}
+
+late_initcall(bpf_ksym_iter_register);
+
+#endif /* CONFIG_BPF_SYSCALL */
+
static inline int kallsyms_for_perf(void)
{
#ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 601ccf1b2f09..bc921a3f7ea8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1861,6 +1861,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
ftrace_hash_rec_update_modify(ops, filter_hash, 1);
}
+static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip);
+
/*
* Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
* or no-needed to update, -EBUSY if it detects a conflict of the flag
@@ -1869,6 +1871,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
* - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
* - If the hash is EMPTY_HASH, it hits nothing
* - Anything else hits the recs which match the hash entries.
+ *
+ * DIRECT ops does not have IPMODIFY flag, but we still need to check it
+ * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call
+ * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with
+ * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate
+ * the return value to the caller and eventually to the owner of the DIRECT
+ * ops.
*/
static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
struct ftrace_hash *old_hash,
@@ -1877,17 +1886,26 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
struct ftrace_page *pg;
struct dyn_ftrace *rec, *end = NULL;
int in_old, in_new;
+ bool is_ipmodify, is_direct;
/* Only update if the ops has been registered */
if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
return 0;
- if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+ is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY;
+ is_direct = ops->flags & FTRACE_OPS_FL_DIRECT;
+
+ /* neither IPMODIFY nor DIRECT, skip */
+ if (!is_ipmodify && !is_direct)
+ return 0;
+
+ if (WARN_ON_ONCE(is_ipmodify && is_direct))
return 0;
/*
- * Since the IPMODIFY is a very address sensitive action, we do not
- * allow ftrace_ops to set all functions to new hash.
+ * Since the IPMODIFY and DIRECT are very address sensitive
+ * actions, we do not allow ftrace_ops to set all functions to new
+ * hash.
*/
if (!new_hash || !old_hash)
return -EINVAL;
@@ -1905,12 +1923,32 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
continue;
if (in_new) {
- /* New entries must ensure no others are using it */
- if (rec->flags & FTRACE_FL_IPMODIFY)
- goto rollback;
- rec->flags |= FTRACE_FL_IPMODIFY;
- } else /* Removed entry */
+ if (rec->flags & FTRACE_FL_IPMODIFY) {
+ int ret;
+
+ /* Cannot have two ipmodify on same rec */
+ if (is_ipmodify)
+ goto rollback;
+
+ FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT);
+
+ /*
+ * Another ops with IPMODIFY is already
+ * attached. We are now attaching a direct
+ * ops. Run SHARE_IPMODIFY_SELF, to check
+ * whether sharing is supported.
+ */
+ if (!ops->ops_func)
+ return -EBUSY;
+ ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF);
+ if (ret)
+ return ret;
+ } else if (is_ipmodify) {
+ rec->flags |= FTRACE_FL_IPMODIFY;
+ }
+ } else if (is_ipmodify) {
rec->flags &= ~FTRACE_FL_IPMODIFY;
+ }
} while_for_each_ftrace_rec();
return 0;
@@ -2454,8 +2492,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
struct ftrace_ops direct_ops = {
.func = call_direct_funcs,
- .flags = FTRACE_OPS_FL_IPMODIFY
- | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
+ .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
/*
* By declaring the main trampoline as this trampoline
@@ -3072,14 +3109,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops)
}
/*
- * Check if the current ops references the record.
+ * Check if the current ops references the given ip.
*
* If the ops traces all functions, then it was already accounted for.
* If the ops does not trace the current record function, skip it.
* If the ops ignores the function via notrace filter, skip it.
*/
-static inline bool
-ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+static bool
+ops_references_ip(struct ftrace_ops *ops, unsigned long ip)
{
/* If ops isn't enabled, ignore it */
if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
@@ -3091,16 +3128,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
/* The function must be in the filter */
if (!ftrace_hash_empty(ops->func_hash->filter_hash) &&
- !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))
+ !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip))
return false;
/* If in notrace hash, we ignore it too */
- if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
+ if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip))
return false;
return true;
}
+/*
+ * Check if the current ops references the record.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static bool
+ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+ return ops_references_ip(ops, rec->ip);
+}
+
static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
{
bool init_nop = ftrace_need_init_nop();
@@ -5215,6 +5265,8 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
return direct;
}
+static int register_ftrace_function_nolock(struct ftrace_ops *ops);
+
/**
* register_ftrace_direct - Call a custom trampoline directly
* @ip: The address of the nop at the beginning of a function
@@ -5286,7 +5338,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
- ret = register_ftrace_function(&direct_ops);
+ ret = register_ftrace_function_nolock(&direct_ops);
if (ret)
ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
}
@@ -5545,8 +5597,7 @@ int modify_ftrace_direct(unsigned long ip,
}
EXPORT_SYMBOL_GPL(modify_ftrace_direct);
-#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \
- FTRACE_OPS_FL_SAVE_REGS)
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
static int check_direct_multi(struct ftrace_ops *ops)
{
@@ -5639,7 +5690,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
ops->flags = MULTI_FLAGS;
ops->trampoline = FTRACE_REGS_ADDR;
- err = register_ftrace_function(ops);
+ err = register_ftrace_function_nolock(ops);
out_remove:
if (err)
@@ -5691,22 +5742,8 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
}
EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi);
-/**
- * modify_ftrace_direct_multi - Modify an existing direct 'multi' call
- * to call something else
- * @ops: The address of the struct ftrace_ops object
- * @addr: The address of the new trampoline to call at @ops functions
- *
- * This is used to unregister currently registered direct caller and
- * register new one @addr on functions registered in @ops object.
- *
- * Note there's window between ftrace_shutdown and ftrace_startup calls
- * where there will be no callbacks called.
- *
- * Returns: zero on success. Non zero on error, which includes:
- * -EINVAL - The @ops object was not properly registered.
- */
-int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+static int
+__modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
{
struct ftrace_hash *hash;
struct ftrace_func_entry *entry, *iter;
@@ -5717,20 +5754,15 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
int i, size;
int err;
- if (check_direct_multi(ops))
- return -EINVAL;
- if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
- return -EINVAL;
-
- mutex_lock(&direct_mutex);
+ lockdep_assert_held_once(&direct_mutex);
/* Enable the tmp_ops to have the same functions as the direct ops */
ftrace_ops_init(&tmp_ops);
tmp_ops.func_hash = ops->func_hash;
- err = register_ftrace_function(&tmp_ops);
+ err = register_ftrace_function_nolock(&tmp_ops);
if (err)
- goto out_direct;
+ return err;
/*
* Now the ftrace_ops_list_func() is called to do the direct callers.
@@ -5754,7 +5786,64 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
/* Removing the tmp_ops will add the updated direct callers to the functions */
unregister_ftrace_function(&tmp_ops);
- out_direct:
+ return err;
+}
+
+/**
+ * modify_ftrace_direct_multi_nolock - Modify an existing direct 'multi' call
+ * to call something else
+ * @ops: The address of the struct ftrace_ops object
+ * @addr: The address of the new trampoline to call at @ops functions
+ *
+ * This is used to unregister currently registered direct caller and
+ * register new one @addr on functions registered in @ops object.
+ *
+ * Note there's window between ftrace_shutdown and ftrace_startup calls
+ * where there will be no callbacks called.
+ *
+ * Caller should already have direct_mutex locked, so we don't lock
+ * direct_mutex here.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ * -EINVAL - The @ops object was not properly registered.
+ */
+int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr)
+{
+ if (check_direct_multi(ops))
+ return -EINVAL;
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return -EINVAL;
+
+ return __modify_ftrace_direct_multi(ops, addr);
+}
+EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock);
+
+/**
+ * modify_ftrace_direct_multi - Modify an existing direct 'multi' call
+ * to call something else
+ * @ops: The address of the struct ftrace_ops object
+ * @addr: The address of the new trampoline to call at @ops functions
+ *
+ * This is used to unregister currently registered direct caller and
+ * register new one @addr on functions registered in @ops object.
+ *
+ * Note there's window between ftrace_shutdown and ftrace_startup calls
+ * where there will be no callbacks called.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ * -EINVAL - The @ops object was not properly registered.
+ */
+int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+ int err;
+
+ if (check_direct_multi(ops))
+ return -EINVAL;
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+ return -EINVAL;
+
+ mutex_lock(&direct_mutex);
+ err = __modify_ftrace_direct_multi(ops, addr);
mutex_unlock(&direct_mutex);
return err;
}
@@ -7965,6 +8054,143 @@ int ftrace_is_dead(void)
return ftrace_disabled;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When registering ftrace_ops with IPMODIFY, it is necessary to make sure
+ * it doesn't conflict with any direct ftrace_ops. If there is existing
+ * direct ftrace_ops on a kernel function being patched, call
+ * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing.
+ *
+ * @ops: ftrace_ops being registered.
+ *
+ * Returns:
+ * 0 on success;
+ * Negative on failure.
+ */
+static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
+{
+ struct ftrace_func_entry *entry;
+ struct ftrace_hash *hash;
+ struct ftrace_ops *op;
+ int size, i, ret;
+
+ lockdep_assert_held_once(&direct_mutex);
+
+ if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+ return 0;
+
+ hash = ops->func_hash->filter_hash;
+ size = 1 << hash->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+ unsigned long ip = entry->ip;
+ bool found_op = false;
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ if (!(op->flags & FTRACE_OPS_FL_DIRECT))
+ continue;
+ if (ops_references_ip(op, ip)) {
+ found_op = true;
+ break;
+ }
+ } while_for_each_ftrace_op(op);
+ mutex_unlock(&ftrace_lock);
+
+ if (found_op) {
+ if (!op->ops_func)
+ return -EBUSY;
+
+ ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Similar to prepare_direct_functions_for_ipmodify, clean up after ops
+ * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT
+ * ops.
+ */
+static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
+{
+ struct ftrace_func_entry *entry;
+ struct ftrace_hash *hash;
+ struct ftrace_ops *op;
+ int size, i;
+
+ if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+ return;
+
+ mutex_lock(&direct_mutex);
+
+ hash = ops->func_hash->filter_hash;
+ size = 1 << hash->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+ unsigned long ip = entry->ip;
+ bool found_op = false;
+
+ mutex_lock(&ftrace_lock);
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ if (!(op->flags & FTRACE_OPS_FL_DIRECT))
+ continue;
+ if (ops_references_ip(op, ip)) {
+ found_op = true;
+ break;
+ }
+ } while_for_each_ftrace_op(op);
+ mutex_unlock(&ftrace_lock);
+
+ /* The cleanup is optional, ignore any errors */
+ if (found_op && op->ops_func)
+ op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER);
+ }
+ }
+ mutex_unlock(&direct_mutex);
+}
+
+#define lock_direct_mutex() mutex_lock(&direct_mutex)
+#define unlock_direct_mutex() mutex_unlock(&direct_mutex)
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
+{
+ return 0;
+}
+
+static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
+{
+}
+
+#define lock_direct_mutex() do { } while (0)
+#define unlock_direct_mutex() do { } while (0)
+
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+
+/*
+ * Similar to register_ftrace_function, except we don't lock direct_mutex.
+ */
+static int register_ftrace_function_nolock(struct ftrace_ops *ops)
+{
+ int ret;
+
+ ftrace_ops_init(ops);
+
+ mutex_lock(&ftrace_lock);
+
+ ret = ftrace_startup(ops, 0);
+
+ mutex_unlock(&ftrace_lock);
+
+ return ret;
+}
+
/**
* register_ftrace_function - register a function for profiling
* @ops: ops structure that holds the function for profiling.
@@ -7980,14 +8206,15 @@ int register_ftrace_function(struct ftrace_ops *ops)
{
int ret;
- ftrace_ops_init(ops);
-
- mutex_lock(&ftrace_lock);
-
- ret = ftrace_startup(ops, 0);
+ lock_direct_mutex();
+ ret = prepare_direct_functions_for_ipmodify(ops);
+ if (ret < 0)
+ goto out_unlock;
- mutex_unlock(&ftrace_lock);
+ ret = register_ftrace_function_nolock(ops);
+out_unlock:
+ unlock_direct_mutex();
return ret;
}
EXPORT_SYMBOL_GPL(register_ftrace_function);
@@ -8006,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
ret = ftrace_shutdown(ops, 0);
mutex_unlock(&ftrace_lock);
+ cleanup_direct_functions_after_ipmodify(ops);
return ret;
}
EXPORT_SYMBOL_GPL(unregister_ftrace_function);