From 5db7c8b9f9fc2aeec671ae3ca6375752c162e0e7 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 18 Jun 2019 23:07:36 +0300 Subject: ipvs: fix tinfo memory leak in start_sync_thread syzkaller reports for memory leak in start_sync_thread [1] As Eric points out, kthread may start and stop before the threadfn function is called, so there is no chance the data (tinfo in our case) to be released in thread. Fix this by releasing tinfo in the controlling code instead. [1] BUG: memory leak unreferenced object 0xffff8881206bf700 (size 32): comm "syz-executor761", pid 7268, jiffies 4294943441 (age 20.470s) hex dump (first 32 bytes): 00 40 7c 09 81 88 ff ff 80 45 b8 21 81 88 ff ff .@|......E.!.... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000057619e23>] kmemleak_alloc_recursive include/linux/kmemleak.h:55 [inline] [<0000000057619e23>] slab_post_alloc_hook mm/slab.h:439 [inline] [<0000000057619e23>] slab_alloc mm/slab.c:3326 [inline] [<0000000057619e23>] kmem_cache_alloc_trace+0x13d/0x280 mm/slab.c:3553 [<0000000086ce5479>] kmalloc include/linux/slab.h:547 [inline] [<0000000086ce5479>] start_sync_thread+0x5d2/0xe10 net/netfilter/ipvs/ip_vs_sync.c:1862 [<000000001a9229cc>] do_ip_vs_set_ctl+0x4c5/0x780 net/netfilter/ipvs/ip_vs_ctl.c:2402 [<00000000ece457c8>] nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] [<00000000ece457c8>] nf_setsockopt+0x4c/0x80 net/netfilter/nf_sockopt.c:115 [<00000000942f62d4>] ip_setsockopt net/ipv4/ip_sockglue.c:1258 [inline] [<00000000942f62d4>] ip_setsockopt+0x9b/0xb0 net/ipv4/ip_sockglue.c:1238 [<00000000a56a8ffd>] udp_setsockopt+0x4e/0x90 net/ipv4/udp.c:2616 [<00000000fa895401>] sock_common_setsockopt+0x38/0x50 net/core/sock.c:3130 [<0000000095eef4cf>] __sys_setsockopt+0x98/0x120 net/socket.c:2078 [<000000009747cf88>] __do_sys_setsockopt net/socket.c:2089 [inline] [<000000009747cf88>] __se_sys_setsockopt net/socket.c:2086 [inline] [<000000009747cf88>] __x64_sys_setsockopt+0x26/0x30 net/socket.c:2086 [<00000000ded8ba80>] do_syscall_64+0x76/0x1a0 arch/x86/entry/common.c:301 [<00000000893b4ac8>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reported-by: syzbot+7e2e50c8adfccd2e5041@syzkaller.appspotmail.com Suggested-by: Eric Biggers Fixes: 998e7a76804b ("ipvs: Use kthread_run() instead of doing a double-fork via kernel_thread()") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2ac40135b576..b36a1df93e7c 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -808,11 +808,12 @@ struct ipvs_master_sync_state { struct ip_vs_sync_buff *sync_buff; unsigned long sync_queue_len; unsigned int sync_queue_delay; - struct task_struct *master_thread; struct delayed_work master_wakeup_work; struct netns_ipvs *ipvs; }; +struct ip_vs_sync_thread_data; + /* How much time to keep dests in trash */ #define IP_VS_DEST_TRASH_PERIOD (120 * HZ) @@ -943,7 +944,8 @@ struct netns_ipvs { spinlock_t sync_lock; struct ipvs_master_sync_state *ms; spinlock_t sync_buff_lock; - struct task_struct **backup_threads; + struct ip_vs_sync_thread_data *master_tinfo; + struct ip_vs_sync_thread_data *backup_tinfo; int threads_mask; volatile int sync_state; struct mutex sync_mutex; -- cgit v1.2.3 From b60a77386b1d4868f72f6353d35dabe5fbe981f2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jun 2019 20:40:45 +0200 Subject: net: make skb_dst_force return true when dst is refcounted netfilter did not expect that skb_dst_force() can cause skb to lose its dst entry. I got a bug report with a skb->dst NULL dereference in netfilter output path. The backtrace contains nf_reinject(), so the dst might have been cleared when skb got queued to userspace. Other users were fixed via if (skb_dst(skb)) { skb_dst_force(skb); if (!skb_dst(skb)) goto handle_err; } But I think its preferable to make the 'dst might be cleared' part of the function explicit. In netfilter case, skb with a null dst is expected when queueing in prerouting hook, so drop skb for the other hooks. v2: v1 of this patch returned true in case skb had no dst entry. Eric said: Say if we have two skb_dst_force() calls for some reason on the same skb, only the first one will return false. This now returns false even when skb had no dst, as per Erics suggestion, so callers might need to check skb_dst() first before skb_dst_force(). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/dst.h | 5 ++++- net/netfilter/nf_queue.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 12b31c602cb0..f8206d3fed2f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -302,8 +302,9 @@ static inline bool dst_hold_safe(struct dst_entry *dst) * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. + * Returns true if dst is refcounted. */ -static inline void skb_dst_force(struct sk_buff *skb) +static inline bool skb_dst_force(struct sk_buff *skb) { if (skb_dst_is_noref(skb)) { struct dst_entry *dst = skb_dst(skb); @@ -314,6 +315,8 @@ static inline void skb_dst_force(struct sk_buff *skb) skb->_skb_refdst = (unsigned long)dst; } + + return skb->_skb_refdst != 0UL; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b5b2be55ca82..2c440015ff0c 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -190,6 +190,11 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } + if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) { + status = -ENETDOWN; + goto err; + } + *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, @@ -198,7 +203,6 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, }; nf_queue_entry_get_refs(entry); - skb_dst_force(skb); switch (entry->state.pf) { case AF_INET: -- cgit v1.2.3 From 40f6a2cb9cfc5da713f745b23bcc2c6761e5eb5e Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Mon, 1 Jul 2019 17:25:39 +0530 Subject: net: dst.h: Fix shifting signed 32-bit value by 31 bits problem Fix DST_FEATURE_ECN_CA to use "U" cast to avoid shifting signed 32-bit value by 31 bits problem. Signed-off-by: Vandana BN Signed-off-by: David S. Miller --- include/net/dst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index f8206d3fed2f..fe62fe2eb781 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -183,7 +183,7 @@ static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) } /* Kernel-internal feature bits that are unallocated in user space. */ -#define DST_FEATURE_ECN_CA (1 << 31) +#define DST_FEATURE_ECN_CA (1U << 31) #define DST_FEATURE_MASK (DST_FEATURE_ECN_CA) #define DST_FEATURE_ECN_MASK (DST_FEATURE_ECN_CA | RTAX_FEATURE_ECN) -- cgit v1.2.3 From 88405680ec57c35f5886dbb81b3f6f638f74f40d Mon Sep 17 00:00:00 2001 From: Vandana BN Date: Mon, 1 Jul 2019 19:46:10 +0530 Subject: net:gue.h:Fix shifting signed 32-bit value by 31 bits problem Fix GUE_PFLAG_REMCSUM to use "U" cast to avoid shifting signed 32-bit value by 31 bits problem. Signed-off-by: Vandana BN Signed-off-by: David S. Miller --- include/net/gue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/gue.h b/include/net/gue.h index fdad41469b65..3a6595bfa641 100644 --- a/include/net/gue.h +++ b/include/net/gue.h @@ -60,7 +60,7 @@ struct guehdr { /* Private flags in the private option extension */ -#define GUE_PFLAG_REMCSUM htonl(1 << 31) +#define GUE_PFLAG_REMCSUM htonl(1U << 31) #define GUE_PLEN_REMCSUM 4 #define GUE_PFLAGS_ALL (GUE_PFLAG_REMCSUM) -- cgit v1.2.3 From acd3e96d53a24d219f720ed4012b62723ae05da1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 28 Jun 2019 16:11:39 -0700 Subject: net/tls: make sure offload also gets the keys wiped Commit 86029d10af18 ("tls: zero the crypto information from tls_context before freeing") added memzero_explicit() calls to clear the key material before freeing struct tls_context, but it missed tls_device.c has its own way of freeing this structure. Replace the missing free. Fixes: 86029d10af18 ("tls: zero the crypto information from tls_context before freeing") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- include/net/tls.h | 1 + net/tls/tls_device.c | 2 +- net/tls/tls_main.c | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tls.h b/include/net/tls.h index 53d96bca220d..889df0312cd1 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -313,6 +313,7 @@ struct tls_offload_context_rx { (ALIGN(sizeof(struct tls_offload_context_rx), sizeof(void *)) + \ TLS_DRIVER_STATE_SIZE) +void tls_ctx_free(struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 397990407ed6..eb8f24f420f0 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx) if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); - kfree(ctx); + tls_ctx_free(ctx); } static void tls_device_gc_task(struct work_struct *work) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index e2b69e805d46..4674e57e66b0 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -251,7 +251,7 @@ static void tls_write_space(struct sock *sk) ctx->sk_write_space(sk); } -static void tls_ctx_free(struct tls_context *ctx) +void tls_ctx_free(struct tls_context *ctx) { if (!ctx) return; @@ -643,7 +643,7 @@ static void tls_hw_sk_destruct(struct sock *sk) ctx->sk_destruct(sk); /* Free ctx */ - kfree(ctx); + tls_ctx_free(ctx); icsk->icsk_ulp_data = NULL; } -- cgit v1.2.3 From 455302d1c9ae9318660aaeb9748a01ff414c9741 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 28 Jun 2019 11:04:07 +0300 Subject: xdp: fix hang while unregistering device bound to xdp socket Device that bound to XDP socket will not have zero refcount until the userspace application will not close it. This leads to hang inside 'netdev_wait_allrefs()' if device unregistering requested: # ip link del p1 < hang on recvmsg on netlink socket > # ps -x | grep ip 5126 pts/0 D+ 0:00 ip link del p1 # journalctl -b Jun 05 07:19:16 kernel: unregister_netdevice: waiting for p1 to become free. Usage count = 1 Jun 05 07:19:27 kernel: unregister_netdevice: waiting for p1 to become free. Usage count = 1 ... Fix that by implementing NETDEV_UNREGISTER event notification handler to properly clean up all the resources and unref device. This should also allow socket killing via ss(8) utility. Fixes: 965a99098443 ("xsk: add support for bind for Rx") Signed-off-by: Ilya Maximets Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 5 +++ net/xdp/xdp_umem.c | 10 +++--- net/xdp/xdp_umem.h | 1 + net/xdp/xsk.c | 87 +++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 87 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index d074b6d60f8a..7da155164947 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -61,6 +61,11 @@ struct xdp_sock { struct xsk_queue *tx ____cacheline_aligned_in_smp; struct list_head list; bool zc; + enum { + XSK_READY = 0, + XSK_BOUND, + XSK_UNBOUND, + } state; /* Protects multiple processes in the control path */ struct mutex mutex; /* Mutual exclusion of NAPI TX thread and sendmsg error paths diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 267b82a4cbcf..20c91f02d3d8 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -140,11 +140,13 @@ out_rtnl_unlock: return err; } -static void xdp_umem_clear_dev(struct xdp_umem *umem) +void xdp_umem_clear_dev(struct xdp_umem *umem) { struct netdev_bpf bpf; int err; + ASSERT_RTNL(); + if (!umem->dev) return; @@ -153,17 +155,13 @@ static void xdp_umem_clear_dev(struct xdp_umem *umem) bpf.xsk.umem = NULL; bpf.xsk.queue_id = umem->queue_id; - rtnl_lock(); err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf); - rtnl_unlock(); if (err) WARN(1, "failed to disable umem!\n"); } - rtnl_lock(); xdp_clear_umem_at_qid(umem->dev, umem->queue_id); - rtnl_unlock(); dev_put(umem->dev); umem->dev = NULL; @@ -195,7 +193,9 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem) { + rtnl_lock(); xdp_umem_clear_dev(umem); + rtnl_unlock(); ida_simple_remove(&umem_ida, umem->id); diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 27603227601b..a63a9fb251f5 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -10,6 +10,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, u16 queue_id, u16 flags); +void xdp_umem_clear_dev(struct xdp_umem *umem); bool xdp_umem_validate_queues(struct xdp_umem *umem); void xdp_get_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index a14e8864e4fa..f53a6ef7c155 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -335,6 +335,22 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, return 0; } +static void xsk_unbind_dev(struct xdp_sock *xs) +{ + struct net_device *dev = xs->dev; + + if (!dev || xs->state != XSK_BOUND) + return; + + xs->state = XSK_UNBOUND; + + /* Wait for driver to stop using the xdp socket. */ + xdp_del_sk_umem(xs->umem, xs); + xs->dev = NULL; + synchronize_net(); + dev_put(dev); +} + static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -354,15 +370,7 @@ static int xsk_release(struct socket *sock) sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); - if (xs->dev) { - struct net_device *dev = xs->dev; - - /* Wait for driver to stop using the xdp socket. */ - xdp_del_sk_umem(xs->umem, xs); - xs->dev = NULL; - synchronize_net(); - dev_put(dev); - } + xsk_unbind_dev(xs); xskq_destroy(xs->rx); xskq_destroy(xs->tx); @@ -412,7 +420,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) return -EINVAL; mutex_lock(&xs->mutex); - if (xs->dev) { + if (xs->state != XSK_READY) { err = -EBUSY; goto out_release; } @@ -492,6 +500,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) out_unlock: if (err) dev_put(dev); + else + xs->state = XSK_BOUND; out_release: mutex_unlock(&xs->mutex); return err; @@ -520,6 +530,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; mutex_lock(&xs->mutex); + if (xs->state != XSK_READY) { + mutex_unlock(&xs->mutex); + return -EBUSY; + } q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; err = xsk_init_queue(entries, q, false); mutex_unlock(&xs->mutex); @@ -534,7 +548,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; mutex_lock(&xs->mutex); - if (xs->umem) { + if (xs->state != XSK_READY || xs->umem) { mutex_unlock(&xs->mutex); return -EBUSY; } @@ -561,6 +575,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; mutex_lock(&xs->mutex); + if (xs->state != XSK_READY) { + mutex_unlock(&xs->mutex); + return -EBUSY; + } if (!xs->umem) { mutex_unlock(&xs->mutex); return -EINVAL; @@ -662,6 +680,9 @@ static int xsk_mmap(struct file *file, struct socket *sock, unsigned long pfn; struct page *qpg; + if (xs->state != XSK_READY) + return -EBUSY; + if (offset == XDP_PGOFF_RX_RING) { q = READ_ONCE(xs->rx); } else if (offset == XDP_PGOFF_TX_RING) { @@ -693,6 +714,38 @@ static int xsk_mmap(struct file *file, struct socket *sock, size, vma->vm_page_prot); } +static int xsk_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct sock *sk; + + switch (msg) { + case NETDEV_UNREGISTER: + mutex_lock(&net->xdp.lock); + sk_for_each(sk, &net->xdp.list) { + struct xdp_sock *xs = xdp_sk(sk); + + mutex_lock(&xs->mutex); + if (xs->dev == dev) { + sk->sk_err = ENETDOWN; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_error_report(sk); + + xsk_unbind_dev(xs); + + /* Clear device references in umem. */ + xdp_umem_clear_dev(xs->umem); + } + mutex_unlock(&xs->mutex); + } + mutex_unlock(&net->xdp.lock); + break; + } + return NOTIFY_DONE; +} + static struct proto xsk_proto = { .name = "XDP", .owner = THIS_MODULE, @@ -764,6 +817,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sock_set_flag(sk, SOCK_RCU_FREE); xs = xdp_sk(sk); + xs->state = XSK_READY; mutex_init(&xs->mutex); spin_lock_init(&xs->tx_completion_lock); @@ -784,6 +838,10 @@ static const struct net_proto_family xsk_family_ops = { .owner = THIS_MODULE, }; +static struct notifier_block xsk_netdev_notifier = { + .notifier_call = xsk_notifier, +}; + static int __net_init xsk_net_init(struct net *net) { mutex_init(&net->xdp.lock); @@ -816,8 +874,15 @@ static int __init xsk_init(void) err = register_pernet_subsys(&xsk_net_ops); if (err) goto out_sk; + + err = register_netdevice_notifier(&xsk_netdev_notifier); + if (err) + goto out_pernet; + return 0; +out_pernet: + unregister_pernet_subsys(&xsk_net_ops); out_sk: sock_unregister(PF_XDP); out_proto: -- cgit v1.2.3