From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 6 Mar 2019 14:35:15 -0500 Subject: bpf: only test gso type on gso packets BPF can adjust gso only for tcp bytestreams. Fail on other gso types. But only on gso packets. It does not touch this field if !gso_size. Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols") Signed-off-by: Willem de Bruijn Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 5ceba98069d4..f274620945ff 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) u32 off = skb_mac_header_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); @@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_cow(skb, len_diff); @@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff) u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb); int ret; - if (!skb_is_gso_tcp(skb)) + if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) return -ENOTSUPP; ret = skb_unclone(skb, GFP_ATOMIC); -- cgit v1.2.3 From ea0371f7879987cff70e21d808e3e9fea624c051 Mon Sep 17 00:00:00 2001 From: Peter Oskolkov Date: Mon, 4 Mar 2019 16:27:08 -0800 Subject: net: fix GSO in bpf_lwt_push_ip_encap GSO needs inner headers and inner protocol set properly to work. skb->inner_mac_header: skb_reset_inner_headers() assigns the current mac header value to inner_mac_header; but it is not set at the point, so we need to call skb_reset_inner_mac_header, otherwise gre_gso_segment fails: it does int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); ... if (unlikely(!pskb_may_pull(skb, tnl_hlen))) ... skb->inner_protocol should also be correctly set. Fixes: ca78801a81e0 ("bpf: handle GSO in bpf_lwt_push_encap") Signed-off-by: Peter Oskolkov Reviewed-by: David Ahern Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- net/core/lwt_bpf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core') diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index cf2f8897ca19..126d31ff5ee3 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -625,6 +625,8 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) /* push the encap headers and fix pointers */ skb_reset_inner_headers(skb); + skb_reset_inner_mac_header(skb); /* mac header is not yet set */ + skb_set_inner_protocol(skb, skb->protocol); skb->encapsulation = 1; skb_push(skb, len); if (ingress) -- cgit v1.2.3 From e8e3437762ad938880dd48a3c52d702e7cf3c124 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 7 Mar 2019 11:35:43 +0100 Subject: bpf: Stop the psock parser before canceling its work We might have never enabled (started) the psock's parser, in which case it will not get stopped when destroying the psock. This leads to a warning when trying to cancel parser's work from psock's deferred destructor: [ 405.325769] WARNING: CPU: 1 PID: 3216 at net/strparser/strparser.c:526 strp_done+0x3c/0x40 [ 405.326712] Modules linked in: [last unloaded: test_bpf] [ 405.327359] CPU: 1 PID: 3216 Comm: kworker/1:164 Tainted: G W 5.0.0 #42 [ 405.328294] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fedoraproject.org-1.fc28 04/01/2014 [ 405.329712] Workqueue: events sk_psock_destroy_deferred [ 405.330254] RIP: 0010:strp_done+0x3c/0x40 [ 405.330706] Code: 28 e8 b8 d5 6b ff 48 8d bb 80 00 00 00 e8 9c d5 6b ff 48 8b 7b 18 48 85 ff 74 0d e8 1e a5 e8 ff 48 c7 43 18 00 00 00 00 5b c3 <0f> 0b eb cf 66 66 66 66 90 55 89 f5 53 48 89 fb 48 83 c7 28 e8 0b [ 405.332862] RSP: 0018:ffffc900026bbe50 EFLAGS: 00010246 [ 405.333482] RAX: ffffffff819323e0 RBX: ffff88812cb83640 RCX: ffff88812cb829e8 [ 405.334228] RDX: 0000000000000001 RSI: ffff88812cb837e8 RDI: ffff88812cb83640 [ 405.335366] RBP: ffff88813fd22680 R08: 0000000000000000 R09: 000073746e657665 [ 405.336472] R10: 8080808080808080 R11: 0000000000000001 R12: ffff88812cb83600 [ 405.337760] R13: 0000000000000000 R14: ffff88811f401780 R15: ffff88812cb837e8 [ 405.338777] FS: 0000000000000000(0000) GS:ffff88813fd00000(0000) knlGS:0000000000000000 [ 405.339903] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 405.340821] CR2: 00007fb11489a6b8 CR3: 000000012d4d6000 CR4: 00000000000406e0 [ 405.341981] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 405.343131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 405.344415] Call Trace: [ 405.344821] sk_psock_destroy_deferred+0x23/0x1b0 [ 405.345585] process_one_work+0x1ae/0x3e0 [ 405.346110] worker_thread+0x3c/0x3b0 [ 405.346576] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 405.347187] kthread+0x11d/0x140 [ 405.347601] ? __kthread_parkme+0x80/0x80 [ 405.348108] ret_from_fork+0x35/0x40 [ 405.348566] ---[ end trace a4a3af4026a327d4 ]--- Stop psock's parser just before canceling its work. Fixes: 1d79895aef18 ("sk_msg: Always cancel strp work before freeing the psock") Reported-by: kernel test robot Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann --- net/core/skmsg.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/skmsg.c b/net/core/skmsg.c index ae6f06e45737..cc94d921476c 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -554,6 +554,7 @@ static void sk_psock_destroy_deferred(struct work_struct *gc) struct sk_psock *psock = container_of(gc, struct sk_psock, gc); /* No sk_callback_lock since already detached. */ + strp_stop(&psock->parser.strp); strp_done(&psock->parser.strp); cancel_work_sync(&psock->work); -- cgit v1.2.3 From 3499e87ea0413ee5b2cc028f4c8ed4d424bc7f98 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 16:58:35 +0100 Subject: ethtool: reduce stack usage with clang clang inlines the dev_ethtool() more aggressively than gcc does, leading to a larger amount of used stack space: net/core/ethtool.c:2536:24: error: stack frame size of 1216 bytes in function 'dev_ethtool' [-Werror,-Wframe-larger-than=] Marking the sub-functions that require the most stack space as noinline_for_stack gives us reasonable behavior on all compilers. Signed-off-by: Arnd Bergmann Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- net/core/ethtool.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index d4918ffddda8..b1eb32419732 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2319,9 +2319,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int ret; @@ -2349,9 +2350,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, return 0; } -static int ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int i, ret = 0; @@ -2405,7 +2407,7 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, +static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; -- cgit v1.2.3 From 2a5ff07a0eb945f291e361aa6f6becca8340ba46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 10 Mar 2019 10:39:37 -0700 Subject: gro_cells: make sure device is up in gro_cells_receive() We keep receiving syzbot reports [1] that show that tunnels do not play the rcu/IFF_UP rules properly. At device dismantle phase, gro_cells_destroy() will be called only after a full rcu grace period is observed after IFF_UP has been cleared. This means that IFF_UP needs to be tested before queueing packets into netif_rx() or gro_cells. This patch implements the test in gro_cells_receive() because too many callers do not seem to bother enough. [1] BUG: unable to handle kernel paging request at fffff4ca0b9ffffe PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: netns cleanup_net RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline] RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline] RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline] RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline] RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78 Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03 RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02 RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0 RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645 R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000 R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75 kobject: 'loop2' (000000004bd7d84a): kobject_uevent_env FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0 Call Trace: kobject: 'loop2' (000000004bd7d84a): fill_kobj_path: path = '/devices/virtual/block/loop2' ip_tunnel_dev_free+0x19/0x60 net/ipv4/ip_tunnel.c:1010 netdev_run_todo+0x51c/0x7d0 net/core/dev.c:8970 rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:116 ip_tunnel_delete_nets+0x423/0x5f0 net/ipv4/ip_tunnel.c:1124 vti_exit_batch_net+0x23/0x30 net/ipv4/ip_vti.c:495 ops_exit_list.isra.0+0x105/0x160 net/core/net_namespace.c:156 cleanup_net+0x3fb/0x960 net/core/net_namespace.c:551 process_one_work+0x98e/0x1790 kernel/workqueue.c:2173 worker_thread+0x98/0xe40 kernel/workqueue.c:2319 kthread+0x357/0x430 kernel/kthread.c:246 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352 Modules linked in: CR2: fffff4ca0b9ffffe [ end trace 513fc9c1338d1cb3 ] RIP: 0010:__skb_unlink include/linux/skbuff.h:1929 [inline] RIP: 0010:__skb_dequeue include/linux/skbuff.h:1945 [inline] RIP: 0010:__skb_queue_purge include/linux/skbuff.h:2656 [inline] RIP: 0010:gro_cells_destroy net/core/gro_cells.c:89 [inline] RIP: 0010:gro_cells_destroy+0x19d/0x360 net/core/gro_cells.c:78 Code: 03 42 80 3c 20 00 0f 85 53 01 00 00 48 8d 7a 08 49 8b 47 08 49 c7 07 00 00 00 00 48 89 f9 49 c7 47 08 00 00 00 00 48 c1 e9 03 <42> 80 3c 21 00 0f 85 10 01 00 00 48 89 c1 48 89 42 08 48 c1 e9 03 RSP: 0018:ffff8880aa3f79a8 EFLAGS: 00010a02 RAX: 00ffffffffffffe8 RBX: ffffe8ffffc64b70 RCX: 1ffff8ca0b9ffffe RDX: ffffc6505cffffe8 RSI: ffffffff858410ca RDI: ffffc6505cfffff0 RBP: ffff8880aa3f7a08 R08: ffff8880aa3e8580 R09: fffffbfff1263645 R10: fffffbfff1263644 R11: ffffffff8931b223 R12: dffffc0000000000 kobject: 'loop3' (00000000e4ee57a6): kobject_uevent_env R13: 0000000000000000 R14: ffffe8ffffc64b80 R15: ffffe8ffffc64b75 FS: 0000000000000000(0000) GS:ffff8880ae800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffff4ca0b9ffffe CR3: 0000000094941000 CR4: 00000000001406f0 Fixes: c9e6bc644e55 ("net: add gro_cells infrastructure") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/core/gro_cells.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index acf45ddbe924..e095fb871d91 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -13,22 +13,36 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct gro_cell *cell; + int res; - if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) - return netif_rx(skb); + rcu_read_lock(); + if (unlikely(!(dev->flags & IFF_UP))) + goto drop; + + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) { + res = netif_rx(skb); + goto unlock; + } cell = this_cpu_ptr(gcells->cells); if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { +drop: atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); - return NET_RX_DROP; + res = NET_RX_DROP; + goto unlock; } __skb_queue_tail(&cell->napi_skbs, skb); if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - return NET_RX_SUCCESS; + + res = NET_RX_SUCCESS; + +unlock: + rcu_read_unlock(); + return res; } EXPORT_SYMBOL(gro_cells_receive); -- cgit v1.2.3