diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-01-06 18:07:25 -0800 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-01-06 18:07:26 -0800 |
commit | 257367c0c9d800ef3065b440161596896e179038 (patch) | |
tree | 067926de0948a6a04d115fa9ece37d20e10975f9 /net | |
parent | 710ad98c363a66a0cd8526465426c5c5f8377ee0 (diff) | |
parent | eff14fcd032bc1b403c1716f6823b3c72c58096a (diff) | |
download | linux-257367c0c9d800ef3065b440161596896e179038.tar.bz2 |
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says:
====================
pull-request: bpf-next 2022-01-06
We've added 41 non-merge commits during the last 2 day(s) which contain
a total of 36 files changed, 1214 insertions(+), 368 deletions(-).
The main changes are:
1) Various fixes in the verifier, from Kris and Daniel.
2) Fixes in sockmap, from John.
3) bpf_getsockopt fix, from Kuniyuki.
4) INET_POST_BIND fix, from Menglong.
5) arm64 JIT fix for bpf pseudo funcs, from Hou.
6) BPF ISA doc improvements, from Christoph.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (41 commits)
bpf: selftests: Add bind retry for post_bind{4, 6}
bpf: selftests: Use C99 initializers in test_sock.c
net: bpf: Handle return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND()
bpf/selftests: Test bpf_d_path on rdonly_mem.
libbpf: Add documentation for bpf_map batch operations
selftests/bpf: Don't rely on preserving volatile in PT_REGS macros in loop3
xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames
xdp: Move conversion to xdp_frame out of map functions
page_pool: Store the XDP mem id
page_pool: Add callback to init pages when they are allocated
xdp: Allow registering memory model without rxq reference
samples/bpf: xdpsock: Add timestamp for Tx-only operation
samples/bpf: xdpsock: Add time-out for cleaning Tx
samples/bpf: xdpsock: Add sched policy and priority support
samples/bpf: xdpsock: Add cyclic TX operation capability
samples/bpf: xdpsock: Add clockid selection support
samples/bpf: xdpsock: Add Dest and Src MAC setting for Tx-only operation
samples/bpf: xdpsock: Add VLAN support for Tx-only operation
libbpf 1.0: Deprecate bpf_object__find_map_by_offset() API
libbpf 1.0: Deprecate bpf_map__is_offload_neutral()
...
====================
Link: https://lore.kernel.org/r/20220107013626.53943-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/filter.c | 81 | ||||
-rw-r--r-- | net/core/page_pool.c | 6 | ||||
-rw-r--r-- | net/core/sock_map.c | 21 | ||||
-rw-r--r-- | net/core/xdp.c | 94 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/ping.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_bpf.c | 27 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/udp.c | 1 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 2 | ||||
-rw-r--r-- | net/ipv6/ping.c | 1 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 1 | ||||
-rw-r--r-- | net/ipv6/udp.c | 1 |
13 files changed, 189 insertions, 50 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index 606ab5a98a1a..4603b7cd3cd1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3957,10 +3957,35 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_master_redirect); -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) +static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, + struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + enum bpf_map_type map_type = ri->map_type; + void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; + int err; + + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; + + err = __xsk_map_redirect(fwd, xdp); + if (unlikely(err)) + goto err; + + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); + return err; +} + +static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, + struct net_device *dev, + struct xdp_frame *xdpf, + struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; @@ -3970,6 +3995,11 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ ri->map_type = BPF_MAP_TYPE_UNSPEC; + if (unlikely(!xdpf)) { + err = -EOVERFLOW; + goto err; + } + switch (map_type) { case BPF_MAP_TYPE_DEVMAP: fallthrough; @@ -3977,17 +4007,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, map = READ_ONCE(ri->map); if (unlikely(map)) { WRITE_ONCE(ri->map, NULL); - err = dev_map_enqueue_multi(xdp, dev, map, + err = dev_map_enqueue_multi(xdpf, dev, map, ri->flags & BPF_F_EXCLUDE_INGRESS); } else { - err = dev_map_enqueue(fwd, xdp, dev); + err = dev_map_enqueue(fwd, xdpf, dev); } break; case BPF_MAP_TYPE_CPUMAP: - err = cpu_map_enqueue(fwd, xdp, dev); - break; - case BPF_MAP_TYPE_XSKMAP: - err = __xsk_map_redirect(fwd, xdp); + err = cpu_map_enqueue(fwd, xdpf, dev); break; case BPF_MAP_TYPE_UNSPEC: if (map_id == INT_MAX) { @@ -3996,7 +4023,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, err = -EINVAL; break; } - err = dev_xdp_enqueue(fwd, xdp, dev); + err = dev_xdp_enqueue(fwd, xdpf, dev); break; } fallthrough; @@ -4013,8 +4040,34 @@ err: _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); return err; } + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + + if (map_type == BPF_MAP_TYPE_XSKMAP) + return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + + return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), + xdp_prog); +} EXPORT_SYMBOL_GPL(xdp_do_redirect); +int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + + if (map_type == BPF_MAP_TYPE_XSKMAP) + return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + + return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog); +} +EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); + static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, @@ -4741,12 +4794,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, max_t(int, val * 2, SOCK_MIN_SNDBUF)); @@ -4967,6 +5022,12 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname, goto err_clear; switch (optname) { + case SO_RCVBUF: + *((int *)optval) = sk->sk_rcvbuf; + break; + case SO_SNDBUF: + *((int *)optval) = sk->sk_sndbuf; + break; case SO_MARK: *((int *)optval) = sk->sk_mark; break; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 1a6978427d6c..7347d5c7dbe0 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -217,6 +217,8 @@ static void page_pool_set_pp_info(struct page_pool *pool, { page->pp = pool; page->pp_magic |= PP_SIGNATURE; + if (pool->p.init_callback) + pool->p.init_callback(page, pool->p.init_arg); } static void page_pool_clear_pp_info(struct page *page) @@ -691,10 +693,12 @@ static void page_pool_release_retry(struct work_struct *wq) schedule_delayed_work(&pool->release_dw, DEFER_TIME); } -void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)) +void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), + struct xdp_mem_info *mem) { refcount_inc(&pool->user_cnt); pool->disconnect = disconnect; + pool->xdp_mem_id = mem->id; } void page_pool_destroy(struct page_pool *pool) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 9618ab6d7cc9..1827669eedd6 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -292,15 +292,23 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) if (skb_verdict) psock_set_prog(&psock->progs.skb_verdict, skb_verdict); + /* msg_* and stream_* programs references tracked in psock after this + * point. Reference dec and cleanup will occur through psock destructor + */ ret = sock_map_init_proto(sk, psock); - if (ret < 0) - goto out_drop; + if (ret < 0) { + sk_psock_put(sk, psock); + goto out; + } write_lock_bh(&sk->sk_callback_lock); if (stream_parser && stream_verdict && !psock->saved_data_ready) { ret = sk_psock_init_strp(sk, psock); - if (ret) - goto out_unlock_drop; + if (ret) { + write_unlock_bh(&sk->sk_callback_lock); + sk_psock_put(sk, psock); + goto out; + } sk_psock_start_strp(sk, psock); } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { sk_psock_start_verdict(sk,psock); @@ -309,10 +317,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) } write_unlock_bh(&sk->sk_callback_lock); return 0; -out_unlock_drop: - write_unlock_bh(&sk->sk_callback_lock); -out_drop: - sk_psock_put(sk, psock); out_progs: if (skb_verdict) bpf_prog_put(skb_verdict); @@ -325,6 +329,7 @@ out_put_stream_parser: out_put_stream_verdict: if (stream_verdict) bpf_prog_put(stream_verdict); +out: return ret; } diff --git a/net/core/xdp.c b/net/core/xdp.c index 7fe1df85f505..7aba35504986 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator) mutex_unlock(&mem_id_lock); } -void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) +void xdp_unreg_mem_model(struct xdp_mem_info *mem) { struct xdp_mem_allocator *xa; - int type = xdp_rxq->mem.type; - int id = xdp_rxq->mem.id; + int type = mem->type; + int id = mem->id; /* Reset mem info to defaults */ - xdp_rxq->mem.id = 0; - xdp_rxq->mem.type = 0; - - if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { - WARN(1, "Missing register, driver bug"); - return; - } + mem->id = 0; + mem->type = 0; if (id == 0) return; @@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) rcu_read_unlock(); } } +EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); + +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) +{ + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { + WARN(1, "Missing register, driver bug"); + return; + } + + xdp_unreg_mem_model(&xdp_rxq->mem); +} EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) @@ -259,28 +265,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type) return true; } -int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, - enum xdp_mem_type type, void *allocator) +static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, + enum xdp_mem_type type, + void *allocator) { struct xdp_mem_allocator *xdp_alloc; gfp_t gfp = GFP_KERNEL; int id, errno, ret; void *ptr; - if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { - WARN(1, "Missing register, driver bug"); - return -EFAULT; - } - if (!__is_supported_mem_type(type)) - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); - xdp_rxq->mem.type = type; + mem->type = type; if (!allocator) { if (type == MEM_TYPE_PAGE_POOL) - return -EINVAL; /* Setup time check page_pool req */ - return 0; + return ERR_PTR(-EINVAL); /* Setup time check page_pool req */ + return NULL; } /* Delay init of rhashtable to save memory if feature isn't used */ @@ -290,13 +292,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, mutex_unlock(&mem_id_lock); if (ret < 0) { WARN_ON(1); - return ret; + return ERR_PTR(ret); } } xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); if (!xdp_alloc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); mutex_lock(&mem_id_lock); id = __mem_id_cyclic_get(gfp); @@ -304,31 +306,61 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, errno = id; goto err; } - xdp_rxq->mem.id = id; - xdp_alloc->mem = xdp_rxq->mem; + mem->id = id; + xdp_alloc->mem = *mem; xdp_alloc->allocator = allocator; /* Insert allocator into ID lookup table */ ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); if (IS_ERR(ptr)) { - ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id); - xdp_rxq->mem.id = 0; + ida_simple_remove(&mem_id_pool, mem->id); + mem->id = 0; errno = PTR_ERR(ptr); goto err; } if (type == MEM_TYPE_PAGE_POOL) - page_pool_use_xdp_mem(allocator, mem_allocator_disconnect); + page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem); mutex_unlock(&mem_id_lock); - trace_mem_connect(xdp_alloc, xdp_rxq); - return 0; + return xdp_alloc; err: mutex_unlock(&mem_id_lock); kfree(xdp_alloc); - return errno; + return ERR_PTR(errno); +} + +int xdp_reg_mem_model(struct xdp_mem_info *mem, + enum xdp_mem_type type, void *allocator) +{ + struct xdp_mem_allocator *xdp_alloc; + + xdp_alloc = __xdp_reg_mem_model(mem, type, allocator); + if (IS_ERR(xdp_alloc)) + return PTR_ERR(xdp_alloc); + return 0; +} +EXPORT_SYMBOL_GPL(xdp_reg_mem_model); + +int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator) +{ + struct xdp_mem_allocator *xdp_alloc; + + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { + WARN(1, "Missing register, driver bug"); + return -EFAULT; + } + + xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator); + if (IS_ERR(xdp_alloc)) + return PTR_ERR(xdp_alloc); + + trace_mem_connect(xdp_alloc, xdp_rxq); + return 0; } + EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); /* XDP RX runs under NAPI protection, and in different delivery error diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f53184767ee7..9c465bac1eb0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -531,6 +531,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); if (err) { inet->inet_saddr = inet->inet_rcv_saddr = 0; + if (sk->sk_prot->put_port) + sk->sk_prot->put_port(sk); goto out_release_sock; } } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e540b0dcf085..0e56df3a45e2 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -994,6 +994,7 @@ struct proto ping_prot = { .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, + .put_port = ping_unhash, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index f70aa0932bd6..9b9b02052fd3 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -196,12 +196,39 @@ msg_bytes_ready: long timeo; int data; + if (sock_flag(sk, SOCK_DONE)) + goto out; + + if (sk->sk_err) { + copied = sock_error(sk); + goto out; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN) + goto out; + + if (sk->sk_state == TCP_CLOSE) { + copied = -ENOTCONN; + goto out; + } + timeo = sock_rcvtimeo(sk, nonblock); + if (!timeo) { + copied = -EAGAIN; + goto out; + } + + if (signal_pending(current)) { + copied = sock_intr_errno(timeo); + goto out; + } + data = tcp_msg_wait_data(sk, psock, timeo); if (data && !sk_psock_queue_empty(psock)) goto msg_bytes_ready; copied = -EAGAIN; } +out: release_sock(sk); sk_psock_put(sk, psock); return copied; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ac10e4cdd8d0..9861786b8336 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3076,6 +3076,7 @@ struct proto tcp_prot = { .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, + .put_port = inet_put_port, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = tcp_bpf_update_proto, #endif diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7b18a6f42f18..c2a4411d2b04 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2927,6 +2927,7 @@ struct proto udp_prot = { .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, + .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d1636425654e..8fe7900f1949 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -413,6 +413,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); + if (sk->sk_prot->put_port) + sk->sk_prot->put_port(sk); goto out; } } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 6ac88fe24a8e..9256f6ba87ef 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -177,6 +177,7 @@ struct proto pingv6_prot = { .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, + .put_port = ping_unhash, .obj_size = sizeof(struct raw6_sock), }; EXPORT_SYMBOL_GPL(pingv6_prot); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1ac243d18c2b..075ee8a2df3b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2181,6 +2181,7 @@ struct proto tcpv6_prot = { .hash = inet6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, + .put_port = inet_put_port, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = tcp_bpf_update_proto, #endif diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index df216268cb02..528b81ef19c9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1733,6 +1733,7 @@ struct proto udpv6_prot = { .unhash = udp_lib_unhash, .rehash = udp_v6_rehash, .get_port = udp_v6_get_port, + .put_port = udp_lib_unhash, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = udp_bpf_update_proto, #endif |