diff options
author | Jakub Kicinski <kuba@kernel.org> | 2021-11-09 15:44:47 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2021-11-09 15:44:48 -0800 |
commit | fceb07950a7aac43d52d8c6ef580399a8b9b68fe (patch) | |
tree | 90953da74ef7db8fea74c57844931e79c3e9f2de | |
parent | 9758aba8542bb43029d077303d05df1d00a8dbb5 (diff) | |
parent | b2c4618162ec615a15883a804cce7e27afecfa58 (diff) | |
download | linux-fceb07950a7aac43d52d8c6ef580399a8b9b68fe.tar.bz2 |
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Alexei Starovoitov says:
====================
pull-request: bpf 2021-11-09
We've added 7 non-merge commits during the last 3 day(s) which contain
a total of 10 files changed, 174 insertions(+), 48 deletions(-).
The main changes are:
1) Various sockmap fixes, from John and Jussi.
2) Fix out-of-bound issue with bpf_pseudo_func, from Martin.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf, sockmap: sk_skb data_end access incorrect when src_reg = dst_reg
bpf: sockmap, strparser, and tls are reusing qdisc_skb_cb and colliding
bpf, sockmap: Fix race in ingress receive verdict with redirect to self
bpf, sockmap: Remove unhash handler for BPF sockmap usage
bpf, sockmap: Use stricter sk state checks in sk_lookup_assign
bpf: selftest: Trigger a DCE on the whole subprog
bpf: Stop caching subprog index in the bpf_pseudo_func insn
====================
Link: https://lore.kernel.org/r/20211109215702.38350-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r-- | include/linux/bpf.h | 6 | ||||
-rw-r--r-- | include/linux/skmsg.h | 12 | ||||
-rw-r--r-- | include/net/strparser.h | 20 | ||||
-rw-r--r-- | kernel/bpf/core.c | 7 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 37 | ||||
-rw-r--r-- | net/core/filter.c | 64 | ||||
-rw-r--r-- | net/core/sock_map.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_bpf.c | 48 | ||||
-rw-r--r-- | net/strparser/strparser.c | 10 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/for_each_array_map_elem.c | 12 |
10 files changed, 174 insertions, 48 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2be6dfd68df9..f715e8863f4d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -484,6 +484,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +static inline bool bpf_pseudo_func(const struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; +} + struct bpf_prog_ops { int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index b4256847c707..584d94be9c8b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -507,6 +507,18 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock) return !!psock->saved_data_ready; } +static inline bool sk_is_tcp(const struct sock *sk) +{ + return sk->sk_type == SOCK_STREAM && + sk->sk_protocol == IPPROTO_TCP; +} + +static inline bool sk_is_udp(const struct sock *sk) +{ + return sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP; +} + #if IS_ENABLED(CONFIG_NET_SOCK_MSG) #define BPF_F_STRPARSER (1UL << 1) diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98493a1..732b7097d78e 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,28 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; + /* temp_reg is a temporary register used for bpf_convert_data_end_access + * when dst_reg == src_reg. + */ + u64 temp_reg; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 327e3996eadb..2405e39d800f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -390,6 +390,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, i = end_new; insn = prog->insnsi + end_old; } + if (bpf_pseudo_func(insn)) { + ret = bpf_adj_delta_to_imm(insn, pos, end_old, + end_new, i, probe_pass); + if (ret) + return ret; + continue; + } code = insn->code; if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5f8d9128860a..890b3ec375a3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn) insn->src_reg == BPF_PSEUDO_KFUNC_CALL; } -static bool bpf_pseudo_func(const struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && - insn->src_reg == BPF_PSEUDO_FUNC; -} - struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@ -1960,16 +1954,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env) return -EPERM; } - if (bpf_pseudo_func(insn)) { + if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn)) ret = add_subprog(env, i + insn->imm + 1); - if (ret >= 0) - /* remember subprog */ - insn[1].imm = ret; - } else if (bpf_pseudo_call(insn)) { - ret = add_subprog(env, i + insn->imm + 1); - } else { + else ret = add_kfunc_call(env, insn->imm, insn->off); - } if (ret < 0) return ret; @@ -9387,7 +9375,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) if (insn->src_reg == BPF_PSEUDO_FUNC) { struct bpf_prog_aux *aux = env->prog->aux; - u32 subprogno = insn[1].imm; + u32 subprogno = find_subprog(env, + env->insn_idx + insn->imm + 1); if (!aux->func_info) { verbose(env, "missing btf func_info\n"); @@ -12557,14 +12546,9 @@ static int jit_subprogs(struct bpf_verifier_env *env) return 0; for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { - if (bpf_pseudo_func(insn)) { - env->insn_aux_data[i].call_imm = insn->imm; - /* subprog is encoded in insn[1].imm */ + if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn)) continue; - } - if (!bpf_pseudo_call(insn)) - continue; /* Upon error here we cannot fall back to interpreter but * need a hard reject of the program. Thus -EFAULT is * propagated in any case. @@ -12585,6 +12569,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) env->insn_aux_data[i].call_imm = insn->imm; /* point imm to __bpf_call_base+1 from JITs point of view */ insn->imm = 1; + if (bpf_pseudo_func(insn)) + /* jit (e.g. x86_64) may emit fewer instructions + * if it learns a u32 imm is the same as a u64 imm. + * Force a non zero here. + */ + insn[1].imm = 1; } err = bpf_prog_alloc_jited_linfo(prog); @@ -12669,7 +12659,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { if (bpf_pseudo_func(insn)) { - subprog = insn[1].imm; + subprog = insn->off; insn[0].imm = (u32)(long)func[subprog]->bpf_func; insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; continue; @@ -12720,7 +12710,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { if (bpf_pseudo_func(insn)) { insn[0].imm = env->insn_aux_data[i].call_imm; - insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + insn[1].imm = insn->off; + insn->off = 0; continue; } if (!bpf_pseudo_call(insn)) diff --git a/net/core/filter.c b/net/core/filter.c index 8e8d3b49c297..e471c9b09670 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9756,22 +9756,46 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si, struct bpf_insn *insn) { - /* si->dst_reg = skb->data */ + int reg; + int temp_reg_off = offsetof(struct sk_buff, cb) + + offsetof(struct sk_skb_cb, temp_reg); + + if (si->src_reg == si->dst_reg) { + /* We need an extra register, choose and save a register. */ + reg = BPF_REG_9; + if (si->src_reg == reg || si->dst_reg == reg) + reg--; + if (si->src_reg == reg || si->dst_reg == reg) + reg--; + *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off); + } else { + reg = si->dst_reg; + } + + /* reg = skb->data */ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), - si->dst_reg, si->src_reg, + reg, si->src_reg, offsetof(struct sk_buff, data)); /* AX = skb->len */ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len), BPF_REG_AX, si->src_reg, offsetof(struct sk_buff, len)); - /* si->dst_reg = skb->data + skb->len */ - *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); + /* reg = skb->data + skb->len */ + *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX); /* AX = skb->data_len */ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len), BPF_REG_AX, si->src_reg, offsetof(struct sk_buff, data_len)); - /* si->dst_reg = skb->data + skb->len - skb->data_len */ - *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX); + + /* reg = skb->data + skb->len - skb->data_len */ + *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX); + + if (si->src_reg == si->dst_reg) { + /* Restore the saved register */ + *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg); + *insn++ = BPF_MOV64_REG(si->dst_reg, reg); + *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off); + } return insn; } @@ -9782,11 +9806,33 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; + int off; switch (si->off) { case offsetof(struct __sk_buff, data_end): insn = bpf_convert_data_end_access(si, insn); break; + case offsetof(struct __sk_buff, cb[0]) ... + offsetofend(struct __sk_buff, cb[4]) - 1: + BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20); + BUILD_BUG_ON((offsetof(struct sk_buff, cb) + + offsetof(struct sk_skb_cb, data)) % + sizeof(__u64)); + + prog->cb_access = 1; + off = si->off; + off -= offsetof(struct __sk_buff, cb[0]); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct sk_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + else + *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + break; + + default: return bpf_convert_ctx_access(type, si, insn_buf, prog, target_size); @@ -10423,8 +10469,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx, return -EINVAL; if (unlikely(sk && sk_is_refcounted(sk))) return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */ - if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED)) - return -ESOCKTNOSUPPORT; /* reject connected sockets */ + if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN)) + return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */ + if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE)) + return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */ /* Check if socket is suitable for packet L3/L4 protocol */ if (sk && sk->sk_protocol != ctx->protocol) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index e252b8ec2b85..f39ef79ced67 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops) ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; } -static bool sk_is_tcp(const struct sock *sk) -{ - return sk->sk_type == SOCK_STREAM && - sk->sk_protocol == IPPROTO_TCP; -} - static bool sock_map_redirect_allowed(const struct sock *sk) { if (sk_is_tcp(sk)) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 5f4d6f45d87f..f70aa0932bd6 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -172,6 +172,41 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, return ret; } +static int tcp_bpf_recvmsg_parser(struct sock *sk, + struct msghdr *msg, + size_t len, + int nonblock, + int flags, + int *addr_len) +{ + struct sk_psock *psock; + int copied; + + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + + lock_sock(sk); +msg_bytes_ready: + copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + long timeo; + int data; + + timeo = sock_rcvtimeo(sk, nonblock); + data = tcp_msg_wait_data(sk, psock, timeo); + if (data && !sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + copied = -EAGAIN; + } + release_sock(sk); + sk_psock_put(sk, psock); + return copied; +} + static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -464,6 +499,8 @@ enum { enum { TCP_BPF_BASE, TCP_BPF_TX, + TCP_BPF_RX, + TCP_BPF_TXRX, TCP_BPF_NUM_CFGS, }; @@ -475,7 +512,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable; @@ -483,6 +519,12 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage; + + prot[TCP_BPF_RX] = prot[TCP_BPF_BASE]; + prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser; + + prot[TCP_BPF_TXRX] = prot[TCP_BPF_TX]; + prot[TCP_BPF_TXRX].recvmsg = tcp_bpf_recvmsg_parser; } static void tcp_bpf_check_v6_needs_rebuild(struct proto *ops) @@ -520,6 +562,10 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; + if (psock->progs.stream_verdict || psock->progs.skb_verdict) { + config = (config == TCP_BPF_TX) ? TCP_BPF_TXRX : TCP_BPF_RX; + } + if (restore) { if (inet_csk_has_ulp(sk)) { /* TLS does not have an unhash proto in SW cases, diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 9c0343568d2a..1a72c67afed5 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -27,18 +27,10 @@ static struct workqueue_struct *strp_wq; -struct _strp_msg { - /* Internal cb structure. struct strp_msg must be first for passing - * to upper layer. - */ - struct strp_msg strp; - int accum_len; -}; - static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) { return (struct _strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Lower lock held */ diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c index df918b2469da..52f6995ff29c 100644 --- a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c +++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c @@ -23,6 +23,16 @@ struct callback_ctx { int output; }; +const volatile int bypass_unused = 1; + +static __u64 +unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *data) +{ + data->output = 0; + return 1; +} + static __u64 check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val, struct callback_ctx *data) @@ -54,6 +64,8 @@ int test_pkt_access(struct __sk_buff *skb) data.output = 0; bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0); + if (!bypass_unused) + bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0); arraymap_output = data.output; bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0); |