diff options
Diffstat (limited to 'kernel/bpf/cgroup.c')
-rw-r--r-- | kernel/bpf/cgroup.c | 198 |
1 files changed, 127 insertions, 71 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index b567ca46555c..03145d45e3d5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -19,7 +19,7 @@ #include "../cgroup/cgroup-internal.h" -DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_BPF_ATTACH_TYPE); +DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); EXPORT_SYMBOL(cgroup_bpf_enabled_key); void cgroup_bpf_offline(struct cgroup *cgrp) @@ -113,12 +113,12 @@ static void cgroup_bpf_release(struct work_struct *work) struct list_head *storages = &cgrp->bpf.storages; struct bpf_cgroup_storage *storage, *stmp; - unsigned int type; + unsigned int atype; mutex_lock(&cgroup_mutex); - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { - struct list_head *progs = &cgrp->bpf.progs[type]; + for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { + struct list_head *progs = &cgrp->bpf.progs[atype]; struct bpf_prog_list *pl, *pltmp; list_for_each_entry_safe(pl, pltmp, progs, node) { @@ -128,10 +128,10 @@ static void cgroup_bpf_release(struct work_struct *work) if (pl->link) bpf_cgroup_link_auto_detach(pl->link); kfree(pl); - static_branch_dec(&cgroup_bpf_enabled_key[type]); + static_branch_dec(&cgroup_bpf_enabled_key[atype]); } old_array = rcu_dereference_protected( - cgrp->bpf.effective[type], + cgrp->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); bpf_prog_array_free(old_array); } @@ -196,7 +196,7 @@ static u32 prog_list_length(struct list_head *head) * if parent has overridable or multi-prog, allow attaching */ static bool hierarchy_allows_attach(struct cgroup *cgrp, - enum bpf_attach_type type) + enum cgroup_bpf_attach_type atype) { struct cgroup *p; @@ -204,12 +204,12 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, if (!p) return true; do { - u32 flags = p->bpf.flags[type]; + u32 flags = p->bpf.flags[atype]; u32 cnt; if (flags & BPF_F_ALLOW_MULTI) return true; - cnt = prog_list_length(&p->bpf.progs[type]); + cnt = prog_list_length(&p->bpf.progs[atype]); WARN_ON_ONCE(cnt > 1); if (cnt == 1) return !!(flags & BPF_F_ALLOW_OVERRIDE); @@ -225,7 +225,7 @@ static bool hierarchy_allows_attach(struct cgroup *cgrp, * to programs in this cgroup */ static int compute_effective_progs(struct cgroup *cgrp, - enum bpf_attach_type type, + enum cgroup_bpf_attach_type atype, struct bpf_prog_array **array) { struct bpf_prog_array_item *item; @@ -236,8 +236,8 @@ static int compute_effective_progs(struct cgroup *cgrp, /* count number of effective programs by walking parents */ do { - if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) - cnt += prog_list_length(&p->bpf.progs[type]); + if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[atype]); p = cgroup_parent(p); } while (p); @@ -249,10 +249,10 @@ static int compute_effective_progs(struct cgroup *cgrp, cnt = 0; p = cgrp; do { - if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) continue; - list_for_each_entry(pl, &p->bpf.progs[type], node) { + list_for_each_entry(pl, &p->bpf.progs[atype], node) { if (!prog_list_prog(pl)) continue; @@ -269,10 +269,10 @@ static int compute_effective_progs(struct cgroup *cgrp, } static void activate_effective_progs(struct cgroup *cgrp, - enum bpf_attach_type type, + enum cgroup_bpf_attach_type atype, struct bpf_prog_array *old_array) { - old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array, + old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array, lockdep_is_held(&cgroup_mutex)); /* free prog array after grace period, since __cgroup_bpf_run_*() * might be still walking the array @@ -328,7 +328,7 @@ cleanup: } static int update_effective_progs(struct cgroup *cgrp, - enum bpf_attach_type type) + enum cgroup_bpf_attach_type atype) { struct cgroup_subsys_state *css; int err; @@ -340,7 +340,7 @@ static int update_effective_progs(struct cgroup *cgrp, if (percpu_ref_is_zero(&desc->bpf.refcnt)) continue; - err = compute_effective_progs(desc, type, &desc->bpf.inactive); + err = compute_effective_progs(desc, atype, &desc->bpf.inactive); if (err) goto cleanup; } @@ -357,7 +357,7 @@ static int update_effective_progs(struct cgroup *cgrp, continue; } - activate_effective_progs(desc, type, desc->bpf.inactive); + activate_effective_progs(desc, atype, desc->bpf.inactive); desc->bpf.inactive = NULL; } @@ -436,11 +436,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, enum bpf_attach_type type, u32 flags) { u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); - struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; + enum cgroup_bpf_attach_type atype; struct bpf_prog_list *pl; + struct list_head *progs; int err; if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || @@ -454,10 +455,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, /* replace_prog implies BPF_F_REPLACE, and vice versa */ return -EINVAL; - if (!hierarchy_allows_attach(cgrp, type)) + atype = to_cgroup_bpf_attach_type(type); + if (atype < 0) + return -EINVAL; + + progs = &cgrp->bpf.progs[atype]; + + if (!hierarchy_allows_attach(cgrp, atype)) return -EPERM; - if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags) + if (!list_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) /* Disallow attaching non-overridable on top * of existing overridable in this cgroup. * Disallow attaching multi-prog if overridable or none @@ -490,16 +497,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, pl->prog = prog; pl->link = link; bpf_cgroup_storages_assign(pl->storage, storage); - cgrp->bpf.flags[type] = saved_flags; + cgrp->bpf.flags[atype] = saved_flags; - err = update_effective_progs(cgrp, type); + err = update_effective_progs(cgrp, atype); if (err) goto cleanup; if (old_prog) bpf_prog_put(old_prog); else - static_branch_inc(&cgroup_bpf_enabled_key[type]); + static_branch_inc(&cgroup_bpf_enabled_key[atype]); bpf_cgroup_storages_link(new_storage, cgrp, type); return 0; @@ -520,7 +527,7 @@ cleanup: * all descendant cgroups. This function is guaranteed to succeed. */ static void replace_effective_prog(struct cgroup *cgrp, - enum bpf_attach_type type, + enum cgroup_bpf_attach_type atype, struct bpf_cgroup_link *link) { struct bpf_prog_array_item *item; @@ -539,10 +546,10 @@ static void replace_effective_prog(struct cgroup *cgrp, /* find position of link in effective progs array */ for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { - if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) continue; - head = &cg->bpf.progs[type]; + head = &cg->bpf.progs[atype]; list_for_each_entry(pl, head, node) { if (!prog_list_prog(pl)) continue; @@ -554,7 +561,7 @@ static void replace_effective_prog(struct cgroup *cgrp, found: BUG_ON(!cg); progs = rcu_dereference_protected( - desc->bpf.effective[type], + desc->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); item = &progs->items[pos]; WRITE_ONCE(item->prog, link->link.prog); @@ -574,11 +581,18 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, struct bpf_prog *new_prog) { - struct list_head *progs = &cgrp->bpf.progs[link->type]; + enum cgroup_bpf_attach_type atype; struct bpf_prog *old_prog; struct bpf_prog_list *pl; + struct list_head *progs; bool found = false; + atype = to_cgroup_bpf_attach_type(link->type); + if (atype < 0) + return -EINVAL; + + progs = &cgrp->bpf.progs[atype]; + if (link->link.prog->type != new_prog->type) return -EINVAL; @@ -592,7 +606,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, return -ENOENT; old_prog = xchg(&link->link.prog, new_prog); - replace_effective_prog(cgrp, link->type, link); + replace_effective_prog(cgrp, atype, link); bpf_prog_put(old_prog); return 0; } @@ -667,12 +681,20 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type) { - struct list_head *progs = &cgrp->bpf.progs[type]; - u32 flags = cgrp->bpf.flags[type]; - struct bpf_prog_list *pl; + enum cgroup_bpf_attach_type atype; struct bpf_prog *old_prog; + struct bpf_prog_list *pl; + struct list_head *progs; + u32 flags; int err; + atype = to_cgroup_bpf_attach_type(type); + if (atype < 0) + return -EINVAL; + + progs = &cgrp->bpf.progs[atype]; + flags = cgrp->bpf.flags[atype]; + if (prog && link) /* only one of prog or link can be specified */ return -EINVAL; @@ -686,7 +708,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, pl->prog = NULL; pl->link = NULL; - err = update_effective_progs(cgrp, type); + err = update_effective_progs(cgrp, atype); if (err) goto cleanup; @@ -695,10 +717,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, kfree(pl); if (list_empty(progs)) /* last program was detached, reset flags to zero */ - cgrp->bpf.flags[type] = 0; + cgrp->bpf.flags[atype] = 0; if (old_prog) bpf_prog_put(old_prog); - static_branch_dec(&cgroup_bpf_enabled_key[type]); + static_branch_dec(&cgroup_bpf_enabled_key[atype]); return 0; cleanup: @@ -714,13 +736,21 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, { __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); enum bpf_attach_type type = attr->query.attach_type; - struct list_head *progs = &cgrp->bpf.progs[type]; - u32 flags = cgrp->bpf.flags[type]; + enum cgroup_bpf_attach_type atype; struct bpf_prog_array *effective; + struct list_head *progs; struct bpf_prog *prog; int cnt, ret = 0, i; + u32 flags; - effective = rcu_dereference_protected(cgrp->bpf.effective[type], + atype = to_cgroup_bpf_attach_type(type); + if (atype < 0) + return -EINVAL; + + progs = &cgrp->bpf.progs[atype]; + flags = cgrp->bpf.flags[atype]; + + effective = rcu_dereference_protected(cgrp->bpf.effective[atype], lockdep_is_held(&cgroup_mutex)); if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) @@ -925,14 +955,14 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) link->cgroup = cgrp; link->type = attr->link_create.attach_type; - err = bpf_link_prime(&link->link, &link_primer); + err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); goto out_put_cgroup; } - err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type, - BPF_F_ALLOW_MULTI); + err = cgroup_bpf_attach(cgrp, NULL, NULL, link, + link->type, BPF_F_ALLOW_MULTI); if (err) { bpf_link_cleanup(&link_primer); goto out_put_cgroup; @@ -986,7 +1016,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, */ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, - enum bpf_attach_type type) + enum cgroup_bpf_attach_type atype) { unsigned int offset = skb->data - skb_network_header(skb); struct sock *save_sk; @@ -1008,12 +1038,12 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, /* compute pointers for the bpf prog */ bpf_compute_and_save_data_end(skb, &saved_data_end); - if (type == BPF_CGROUP_INET_EGRESS) { + if (atype == CGROUP_INET_EGRESS) { ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY( - cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb); + cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); } else { - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - __bpf_prog_run_save_cb); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, + __bpf_prog_run_save_cb); ret = (ret == 1 ? 0 : -EPERM); } bpf_restore_data_end(skb, saved_data_end); @@ -1038,12 +1068,12 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); * and if it returned != 1 during execution. In all other cases, 0 is returned. */ int __cgroup_bpf_run_filter_sk(struct sock *sk, - enum bpf_attach_type type) + enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); int ret; - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run); return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -1065,7 +1095,7 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); */ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, struct sockaddr *uaddr, - enum bpf_attach_type type, + enum cgroup_bpf_attach_type atype, void *t_ctx, u32 *flags) { @@ -1090,8 +1120,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, } cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - ret = BPF_PROG_RUN_ARRAY_FLAGS(cgrp->bpf.effective[type], &ctx, - BPF_PROG_RUN, flags); + ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run, flags); return ret == 1 ? 0 : -EPERM; } @@ -1115,19 +1145,19 @@ EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); */ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, - enum bpf_attach_type type) + enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); int ret; - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, - BPF_PROG_RUN); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, + bpf_prog_run); return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, - short access, enum bpf_attach_type type) + short access, enum cgroup_bpf_attach_type atype) { struct cgroup *cgrp; struct bpf_cgroup_dev_ctx ctx = { @@ -1135,12 +1165,12 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, .major = major, .minor = minor, }; - int allow = 1; + int allow; rcu_read_lock(); cgrp = task_dfl_cgroup(current); - allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, - BPF_PROG_RUN); + allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, + bpf_prog_run); rcu_read_unlock(); return !allow; @@ -1231,7 +1261,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, char **buf, size_t *pcount, loff_t *ppos, - enum bpf_attach_type type) + enum cgroup_bpf_attach_type atype) { struct bpf_sysctl_kern ctx = { .head = head, @@ -1271,7 +1301,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, rcu_read_lock(); cgrp = task_dfl_cgroup(current); - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run); rcu_read_unlock(); kfree(ctx.cur_val); @@ -1289,7 +1319,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, #ifdef CONFIG_NET static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, - enum bpf_attach_type attach_type) + enum cgroup_bpf_attach_type attach_type) { struct bpf_prog_array *prog_array; bool empty; @@ -1364,7 +1394,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, * attached to the hook so we don't waste time allocating * memory and locking the socket. */ - if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT)) + if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_SETSOCKOPT)) return 0; /* Allocate a bit more than the initial user buffer for @@ -1385,8 +1415,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, } lock_sock(sk); - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT], - &ctx, BPF_PROG_RUN); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], + &ctx, bpf_prog_run); release_sock(sk); if (!ret) { @@ -1460,7 +1490,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, * attached to the hook so we don't waste time allocating * memory and locking the socket. */ - if (__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) + if (__cgroup_bpf_prog_array_is_empty(cgrp, CGROUP_GETSOCKOPT)) return retval; ctx.optlen = max_optlen; @@ -1495,8 +1525,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, } lock_sock(sk); - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], - &ctx, BPF_PROG_RUN); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], + &ctx, bpf_prog_run); release_sock(sk); if (!ret) { @@ -1556,8 +1586,8 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, * be called if that data shouldn't be "exported". */ - ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT], - &ctx, BPF_PROG_RUN); + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], + &ctx, bpf_prog_run); if (!ret) return -EPERM; @@ -1846,15 +1876,41 @@ const struct bpf_verifier_ops cg_sysctl_verifier_ops = { const struct bpf_prog_ops cg_sysctl_prog_ops = { }; +#ifdef CONFIG_NET +BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx) +{ + const struct net *net = ctx ? sock_net(ctx->sk) : &init_net; + + return net->net_cookie; +} + +static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = { + .func = bpf_get_netns_cookie_sockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX_OR_NULL, +}; +#endif + static const struct bpf_func_proto * cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { #ifdef CONFIG_NET + case BPF_FUNC_get_netns_cookie: + return &bpf_get_netns_cookie_sockopt_proto; case BPF_FUNC_sk_storage_get: return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_setsockopt: + if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) + return &bpf_sk_setsockopt_proto; + return NULL; + case BPF_FUNC_getsockopt: + if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) + return &bpf_sk_getsockopt_proto; + return NULL; #endif #ifdef CONFIG_INET case BPF_FUNC_tcp_sock: |