From fedbb6b4ff341c1e2120f4ffbf367fd78ac3e8f3 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 29 Jun 2016 21:47:03 +0300 Subject: ipv4: Fix ip_skb_dst_mtu to use the sk passed by ip_finish_output ip_skb_dst_mtu uses skb->sk, assuming it is an AF_INET socket (e.g. it calls ip_sk_use_pmtu which casts sk as an inet_sk). However, in the case of UDP tunneling, the skb->sk is not necessarily an inet socket (could be AF_PACKET socket, or AF_UNSPEC if arriving from tun/tap). OTOH, the sk passed as an argument throughout IP stack's output path is the one which is of PMTU interest: - In case of local sockets, sk is same as skb->sk; - In case of a udp tunnel, sk is the tunneling socket. Fix, by passing ip_finish_output's sk to ip_skb_dst_mtu. This augments 7026b1ddb6 'netfilter: Pass socket pointer down through okfn().' Signed-off-by: Shmulik Ladkani Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/bridge/br_netfilter_hooks.c | 2 +- net/ipv4/ip_output.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2d25979273a6..77e7f69bf80d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -700,7 +700,7 @@ static int br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { - unsigned int mtu = ip_skb_dst_mtu(skb); + unsigned int mtu = ip_skb_dst_mtu(sk, skb); struct iphdr *iph = ip_hdr(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 124bf0a66328..4bd4921639c3 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk return dst_output(net, sk, skb); } #endif - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); @@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) mtu = IPCB(skb)->frag_max_size; -- cgit v1.2.3 From eb70db8756717b90c01ccc765fdefc4dd969fc74 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 1 Jul 2016 16:07:50 -0400 Subject: packet: Use symmetric hash for PACKET_FANOUT_HASH. People who use PACKET_FANOUT_HASH want a symmetric hash, meaning that they want packets going in both directions on a flow to hash to the same bucket. The core kernel SKB hash became non-symmetric when the ipv6 flow label and other entities were incorporated into the standard flow hash order to increase entropy. But there are no users of PACKET_FANOUT_HASH who want an assymetric hash, they all want a symmetric one. Therefore, use the flow dissector to compute a flat symmetric hash over only the protocol, addresses and ports. This hash does not get installed into and override the normal skb hash, so this change has no effect whatsoever on the rest of the stack. Reported-by: Eric Leblond Tested-by: Eric Leblond Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + net/core/flow_dissector.c | 43 +++++++++++++++++++++++++++++++++++++++++++ net/packet/af_packet.c | 2 +- 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ee38a4127475..24859d40acde 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1062,6 +1062,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a669dea146c6..61ad43f61c5e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; + +u32 __skb_get_hash_symmetric(struct sk_buff *skb) +{ + struct flow_keys keys; + + __flow_hash_secret_init(); + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, + NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + return __flow_hash_from_keys(&keys, hashrnd); +} +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, }; +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_dissector_symmetric, + flow_keys_dissector_symmetric_keys, + ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_buf_dissector, flow_keys_buf_dissector_keys, ARRAY_SIZE(flow_keys_buf_dissector_keys)); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9bff6ef16fa7..9f0983fa4d52 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb_get_hash(skb), num); + return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, -- cgit v1.2.3 From 82a31b9231f02d9c1b7b290a46999d517b0d312a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 30 Jun 2016 10:15:22 -0700 Subject: net_sched: fix mirrored packets checksum Similar to commit 9b368814b336 ("net: fix bridge multicast packet checksum validation") we need to fixup the checksum for CHECKSUM_COMPLETE when pushing skb on RX path. Otherwise we get similar splats. Cc: Jamal Hadi Salim Cc: Tom Herbert Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/linux/skbuff.h | 19 +++++++++++++++++++ net/core/skbuff.c | 18 ------------------ net/sched/act_mirred.c | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 24859d40acde..f39b37180c41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2870,6 +2870,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb, skb->csum = csum_partial(start, len, skb->csum); } +/** + * skb_push_rcsum - push skb and update receive checksum + * @skb: buffer to update + * @len: length of data pulled + * + * This function performs an skb_push on the packet and updates + * the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_push unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. + */ +static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, + unsigned int len) +{ + skb_push(skb, len); + skb_postpush_rcsum(skb, skb->data, len); + return skb->data; +} + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f2b77e549c03..eb12d2161fb2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3015,24 +3015,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, } EXPORT_SYMBOL_GPL(skb_append_pagefrags); -/** - * skb_push_rcsum - push skb and update receive checksum - * @skb: buffer to update - * @len: length of data pulled - * - * This function performs an skb_push on the packet and updates - * the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_push unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. - */ -static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) -{ - skb_push(skb, len); - skb_postpush_rcsum(skb, skb->data, len); - return skb->data; -} - /** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 128942bc9e42..1f5bd6ccbd2c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb->mac_len); + skb_push_rcsum(skb2, skb->mac_len); } /* mirror is always swallowed */ -- cgit v1.2.3 From 55e77a3e8297581c919b45adcc4d0815b69afa84 Mon Sep 17 00:00:00 2001 From: Richard Alpe Date: Fri, 1 Jul 2016 11:11:21 +0200 Subject: tipc: fix nl compat regression for link statistics Fix incorrect use of nla_strlcpy() where the first NLA_HDRLEN bytes of the link name where left out. Making the output of tipc-config -ls look something like: Link statistics: dcast-link 1:data0-1.1.2:data0 1:data0-1.1.3:data0 Also, for the record, the patch that introduce this regression claims "Sending the whole object out can cause a leak". Which isn't very likely as this is a compat layer, where the data we are parsing is generated by us and we know the string to be NULL terminated. But you can of course never be to secure. Fixes: 5d2be1422e02 (tipc: fix an infoleak in tipc_nl_compat_link_dump) Signed-off-by: Richard Alpe Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 3ad9fab1985f..1fd464764765 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, -- cgit v1.2.3 From 3dad5424adfb346c871847d467f97dcdca64ea97 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Sun, 3 Jul 2016 10:54:54 +0200 Subject: RDS: fix rds_tcp_init() error path If register_pernet_subsys() fails, we shouldn't try to call unregister_pernet_subsys(). Fixes: 467fa15356 ("RDS-TCP: Support multiple RDS-TCP listen endpoints, one per netns.") Cc: stable@vger.kernel.org Cc: Sowmini Varadhan Cc: David S. Miller Signed-off-by: Vegard Nossum Acked-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 74ee126a6fe6..c8a7b4c90190 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -616,7 +616,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -628,8 +628,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; -- cgit v1.2.3 From ab58298cf459fcd4f588a401d36abf0bd2215b51 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Tue, 5 Jul 2016 21:12:53 +0200 Subject: net: fix decnet rtnexthop parsing dn_fib_count_nhs() could enter an infinite loop if nhp->rtnh_len == 0 (i.e. if userspace passes a malformed netlink message). Let's use the helpers from net/nexthop.h which take care of all this stuff. We can do exactly the same as e.g. fib_count_nexthops() and fib_get_nhs() from net/ipv4/fib_semantics.c. This fixes the softlockup for me. Cc: Thomas Graf Signed-off-by: Vegard Nossum Signed-off-by: David S. Miller --- net/decnet/dn_fib.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index df4803437888..a796fc7cbc35 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -41,6 +41,7 @@ #include #include #include +#include #define RT_MIN_TABLE 1 @@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr) struct rtnexthop *nhp = nla_data(attr); int nhs = 0, nhlen = nla_len(attr); - while(nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(nhp, nhlen)) { nhs++; - nhp = RTNH_NEXT(nhp); + nhp = rtnh_next(nhp, &nhlen); } - return nhs; + /* leftover implies invalid nexthop configuration, discard it */ + return nhlen > 0 ? 0 : nhs; } static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, @@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, int nhlen = nla_len(attr); change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(nhp, nhlen)) return -EINVAL; nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; nh->nh_oif = nhp->rtnh_ifindex; nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { + attrlen = rtnh_attrlen(nhp); + if (attrlen > 0) { struct nlattr *gw_attr; gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } - nhp = RTNH_NEXT(nhp); + + nhp = rtnh_next(nhp, &nhlen); } endfor_nexthops(fi); return 0; -- cgit v1.2.3 From 903ce4abdf374e3365d93bcb3df56c62008835ba Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 5 Jul 2016 12:10:23 -0700 Subject: ipv6: Fix mem leak in rt6i_pcpu It was first reported and reproduced by Petr (thanks!) in https://bugzilla.kernel.org/show_bug.cgi?id=119581 free_percpu(rt->rt6i_pcpu) used to always happen in ip6_dst_destroy(). However, after fixing a deadlock bug in commit 9c7370a166b4 ("ipv6: Fix a potential deadlock when creating pcpu rt"), free_percpu() is not called before setting non_pcpu_rt->rt6i_pcpu to NULL. It is worth to note that rt6i_pcpu is protected by table->tb6_lock. kmemleak somehow did not report it. We nailed it down by observing the pcpu entries in /proc/vmallocinfo (first suggested by Hannes, thanks!). Signed-off-by: Martin KaFai Lau Fixes: 9c7370a166b4 ("ipv6: Fix a potential deadlock when creating pcpu rt") Reported-by: Petr Novopashenniy Tested-by: Petr Novopashenniy Acked-by: Hannes Frederic Sowa Cc: Hannes Frederic Sowa Cc: Petr Novopashenniy Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bcef2369d64..771be1fa4176 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } -- cgit v1.2.3