From 95224166a9032ff5d08fca633d37113078ce7d01 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jan 2020 09:32:46 +0100 Subject: vti[6]: fix packet tx through bpf_redirect() With an ebpf program that redirects packets through a vti[6] interface, the packets are dropped because no dst is attached. This could also be reproduced with an AF_PACKET socket, with the following python script (vti1 is an ip_vti interface): import socket send_s = socket.socket(socket.AF_PACKET, socket.SOCK_RAW, 0) # scapy # p = IP(src='10.100.0.2', dst='10.200.0.1')/ICMP(type='echo-request') # raw(p) req = b'E\x00\x00\x1c\x00\x01\x00\x00@\x01e\xb2\nd\x00\x02\n\xc8\x00\x01\x08\x00\xf7\xff\x00\x00\x00\x00' send_s.sendto(req, ('vti1', 0x800, 0, 0)) Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e90b600c7a25..37cddd18f282 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -187,8 +187,17 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); } dst_hold(dst); -- cgit v1.2.3 From 4e4362d2bf2a49ff44dbbc9585207977ca3d71d0 Mon Sep 17 00:00:00 2001 From: Ulrich Weber Date: Wed, 15 Jan 2020 12:11:29 +0100 Subject: xfrm: support output_mark for offload ESP packets Commit 9b42c1f179a6 ("xfrm: Extend the output_mark") added output_mark support but missed ESP offload support. xfrm_smark_get() is not called within xfrm_input() for packets coming from esp4_gro_receive() or esp6_gro_receive(). Therefore call xfrm_smark_get() directly within these functions. Fixes: 9b42c1f179a6 ("xfrm: Extend the output_mark to support input direction and masking.") Signed-off-by: Ulrich Weber Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 2 ++ net/ipv6/esp6_offload.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 0e4a7cf6bc87..e2e219c7854a 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -57,6 +57,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, if (!x) goto out_reset; + skb->mark = xfrm_smark_get(skb->mark, x); + sp->xvec[sp->len++] = x; sp->olen++; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index e31626ffccd1..fd535053245b 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -79,6 +79,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, if (!x) goto out_reset; + skb->mark = xfrm_smark_get(skb->mark, x); + sp->xvec[sp->len++] = x; sp->olen++; -- cgit v1.2.3 From 5b2f1f3070b6447b76174ea8bfb7390dc6253ebd Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 20 Jan 2020 18:04:56 +0800 Subject: tcp_bbr: improve arithmetic division in bbr_update_bw() do_div() does a 64-by-32 division. Use div64_long() instead of it if the divisor is long, to avoid truncation to 32-bit. And as a nice side effect also cleans up the function a bit. Signed-off-by: Wen Yang Cc: Eric Dumazet Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index a6545ef0d27b..6c4d79baff26 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -779,8 +779,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. */ - bw = (u64)rs->delivered * BW_UNIT; - do_div(bw, rs->interval_us); + bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); /* If this sample is application-limited, it is likely to have a very * low delivered count that represents application behavior rather than -- cgit v1.2.3 From bfe02b9f9476bc8784ebb0f78fa244ad15bb15ea Mon Sep 17 00:00:00 2001 From: Theodore Dubois Date: Mon, 20 Jan 2020 14:10:53 -0800 Subject: tcp: remove redundant assigment to snd_cwnd Not sure how this got in here. git blame says the second assignment was added in 3a9a57f6, but that commit also removed the first assignment. Signed-off-by: Theodore Dubois Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d885ba868822..04273d6aa36b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2614,7 +2614,6 @@ int tcp_disconnect(struct sock *sk, int flags) WRITE_ONCE(tp->write_seq, seq); icsk->icsk_backoff = 0; - tp->snd_cwnd = 2; icsk->icsk_probes_out = 0; icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; -- cgit v1.2.3 From d0f418516022c32ecceaf4275423e5bd3f8743a9 Mon Sep 17 00:00:00 2001 From: William Dauchy Date: Tue, 21 Jan 2020 15:26:24 +0100 Subject: net, ip_tunnel: fix namespaces move in the same manner as commit 690afc165bb3 ("net: ip6_gre: fix moving ip6gre between namespaces"), fix namespace moving as it was broken since commit 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata."). Indeed, the ip6_gre commit removed the local flag for collect_md condition, so there is no reason to keep it for ip_gre/ip_tunnel. this patch will fix both ip_tunnel and ip_gre modules. Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: William Dauchy Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 0fe2a5d3e258..74e1d964a615 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1236,10 +1236,8 @@ int ip_tunnel_init(struct net_device *dev) iph->version = 4; iph->ihl = 5; - if (tunnel->collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (tunnel->collect_md) netif_keep_dst(dev); - } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); -- cgit v1.2.3 From d39ca2590d10712f412add7a88e1dd467a7246f4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 21 Jan 2020 16:50:49 +0100 Subject: Revert "udp: do rmem bulk free even if the rx sk queue is empty" This reverts commit 0d4a6608f68c7532dcbfec2ea1150c9761767d03. Williem reported that after commit 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") the memory allocated by an almost idle system with many UDP sockets can grow a lot. For stable kernel keep the solution as simple as possible and revert the offending commit. Reported-by: Willem de Bruijn Diagnosed-by: Eric Dumazet Fixes: 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") Signed-off-by: Paolo Abeni Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 93a355b6b092..030d43c7c957 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1368,7 +1368,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2)) + if (size < (sk->sk_rcvbuf >> 2) && + !skb_queue_empty(&up->reader_queue)) return; } else { size += up->forward_deficit; -- cgit v1.2.3 From bb48eb9b12a95db9d679025927269d4adda6dbd1 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Thu, 23 Jan 2020 13:20:18 +0100 Subject: fou: Fix IPv6 netlink policy When submitting v2 of "fou: Support binding FoU socket" (1713cb37bf67), I accidentally sent the wrong version of the patch and one fix was missing. In the initial version of the patch, as well as the version 2 that I submitted, I incorrectly used ".type" for the two V6-attributes. The correct is to use ".len". Reported-by: Dmitry Vyukov Fixes: 1713cb37bf67 ("fou: Support binding FoU socket") Signed-off-by: Kristian Evensen Signed-off-by: David S. Miller --- net/ipv4/fou.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 30fa771d382a..dcc79ff54b41 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -662,8 +662,8 @@ static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, - [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), }, - [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), }, + [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), }, [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, }; -- cgit v1.2.3 From 2bec445f9bf35e52e395b971df48d3e1e5dc704a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jan 2020 21:03:00 -0800 Subject: tcp: do not leave dangling pointers in tp->highest_sack Latest commit 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") apparently allowed syzbot to trigger various crashes in TCP stack [1] I believe this commit only made things easier for syzbot to find its way into triggering use-after-frees. But really the bugs could lead to bad TCP behavior or even plain crashes even for non malicious peers. I have audited all calls to tcp_rtx_queue_unlink() and tcp_rtx_queue_unlink_and_free() and made sure tp->highest_sack would be updated if we are removing from rtx queue the skb that tp->highest_sack points to. These updates were missing in three locations : 1) tcp_clean_rtx_queue() [This one seems quite serious, I have no idea why this was not caught earlier] 2) tcp_rtx_queue_purge() [Probably not a big deal for normal operations] 3) tcp_send_synack() [Probably not a big deal for normal operations] [1] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1864 [inline] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1856 [inline] BUG: KASAN: use-after-free in tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 Read of size 4 at addr ffff8880a488d068 by task ksoftirqd/1/16 CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:134 tcp_highest_sack_seq include/net/tcp.h:1864 [inline] tcp_highest_sack_seq include/net/tcp.h:1856 [inline] tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 tcp_try_undo_partial net/ipv4/tcp_input.c:2730 [inline] tcp_fastretrans_alert+0xf74/0x23f0 net/ipv4/tcp_input.c:2847 tcp_ack+0x2577/0x5bf0 net/ipv4/tcp_input.c:3710 tcp_rcv_established+0x6dd/0x1e90 net/ipv4/tcp_input.c:5706 tcp_v4_do_rcv+0x619/0x8d0 net/ipv4/tcp_ipv4.c:1619 tcp_v4_rcv+0x307f/0x3b40 net/ipv4/tcp_ipv4.c:2001 ip_protocol_deliver_rcu+0x5a/0x880 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x23b/0x380 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x1db/0x2f0 net/ipv4/ip_input.c:428 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:538 __netif_receive_skb_one_core+0x113/0x1a0 net/core/dev.c:5148 __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5262 process_backlog+0x206/0x750 net/core/dev.c:6093 napi_poll net/core/dev.c:6530 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6598 __do_softirq+0x262/0x98c kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:603 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:595 smpboot_thread_fn+0x6a3/0xa40 kernel/smpboot.c:165 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 Allocated by task 10091: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:513 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:521 slab_post_alloc_hook mm/slab.h:584 [inline] slab_alloc_node mm/slab.c:3263 [inline] kmem_cache_alloc_node+0x138/0x740 mm/slab.c:3575 __alloc_skb+0xd5/0x5e0 net/core/skbuff.c:198 alloc_skb_fclone include/linux/skbuff.h:1099 [inline] sk_stream_alloc_skb net/ipv4/tcp.c:875 [inline] sk_stream_alloc_skb+0x113/0xc90 net/ipv4/tcp.c:852 tcp_sendmsg_locked+0xcf9/0x3470 net/ipv4/tcp.c:1282 tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1432 inet_sendmsg+0x9e/0xe0 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 __sys_sendto+0x262/0x380 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto net/socket.c:2006 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:2006 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 10095: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:335 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 __cache_free mm/slab.c:3426 [inline] kmem_cache_free+0x86/0x320 mm/slab.c:3694 kfree_skbmem+0x178/0x1c0 net/core/skbuff.c:645 __kfree_skb+0x1e/0x30 net/core/skbuff.c:681 sk_eat_skb include/net/sock.h:2453 [inline] tcp_recvmsg+0x1252/0x2930 net/ipv4/tcp.c:2166 inet_recvmsg+0x136/0x610 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:886 [inline] sock_recvmsg net/socket.c:904 [inline] sock_recvmsg+0xce/0x110 net/socket.c:900 __sys_recvfrom+0x1ff/0x350 net/socket.c:2055 __do_sys_recvfrom net/socket.c:2073 [inline] __se_sys_recvfrom net/socket.c:2069 [inline] __x64_sys_recvfrom+0xe1/0x1a0 net/socket.c:2069 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8880a488d040 which belongs to the cache skbuff_fclone_cache of size 456 The buggy address is located 40 bytes inside of 456-byte region [ffff8880a488d040, ffff8880a488d208) The buggy address belongs to the page: page:ffffea0002922340 refcount:1 mapcount:0 mapping:ffff88821b057000 index:0x0 raw: 00fffe0000000200 ffffea00022a5788 ffffea0002624a48 ffff88821b057000 raw: 0000000000000000 ffff8880a488d040 0000000100000006 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8880a488cf00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880a488cf80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8880a488d000: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8880a488d080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a488d100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb Fixes: 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") Fixes: 50895b9de1d3 ("tcp: highest_sack fix") Fixes: 737ff314563c ("tcp: use sequence distance to detect reordering") Signed-off-by: Eric Dumazet Cc: Cambda Zhu Cc: Yuchung Cheng Cc: Neal Cardwell Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_output.c | 1 + 3 files changed, 3 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 04273d6aa36b..a7d766e6390e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2524,6 +2524,7 @@ static void tcp_rtx_queue_purge(struct sock *sk) { struct rb_node *p = rb_first(&sk->tcp_rtx_queue); + tcp_sk(sk)->highest_sack = NULL; while (p) { struct sk_buff *skb = rb_to_skb(p); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5347ab2c9c58..2a976f57f7e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3164,6 +3164,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->retransmit_skb_hint = NULL; if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; + tcp_highest_sack_replace(sk, skb, next); tcp_rtx_queue_unlink_and_free(skb, sk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 58c92a7d671c..b62b59b18db9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3232,6 +3232,7 @@ int tcp_send_synack(struct sock *sk) if (!nskb) return -ENOMEM; INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); + tcp_highest_sack_replace(sk, skb, nskb); tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); -- cgit v1.2.3 From a3ea86739f1bc7e121d921842f0f4a8ab1af94d9 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 23 Jan 2020 10:11:35 +0300 Subject: rt_cpu_seq_next should increase position index if seq_file .next fuction does not change position index, read after some lseek can generate unexpected output. https://bugzilla.kernel.org/show_bug.cgi?id=206283 Signed-off-by: Vasily Averin Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 87e979f2b74a..e356ea779227 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -271,6 +271,7 @@ static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) *pos = cpu+1; return &per_cpu(rt_cache_stat, cpu); } + (*pos)++; return NULL; } -- cgit v1.2.3