From a516993f0ac1694673412eb2d16a091eafa77d2a Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 13 Aug 2015 05:54:07 +0200 Subject: net: fix wrong skb_get() usage / crash in IGMP/MLD parsing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent refactoring of the IGMP and MLD parsing code into ipv6_mc_check_mld() / ip_mc_check_igmp() introduced a potential crash / BUG() invocation for bridges: I wrongly assumed that skb_get() could be used as a simple reference counter for an skb which is not the case. skb_get() bears additional semantics, a user count. This leads to a BUG() invocation in pskb_expand_head() / kernel panic if pskb_may_pull() is called on an skb with a user count greater than one - unfortunately the refactoring did just that. Fixing this by removing the skb_get() call and changing the API: The caller of ipv6_mc_check_mld() / ip_mc_check_igmp() now needs to additionally check whether the returned skb_trimmed is a clone. Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code") Reported-by: Brenden Blanco Signed-off-by: Linus Lüssing Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv6/mcast_snoop.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c index df8afe5ab31e..9405b04eecc6 100644 --- a/net/ipv6/mcast_snoop.c +++ b/net/ipv6/mcast_snoop.c @@ -143,34 +143,36 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff *skb_chk = NULL; unsigned int transport_len; unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); - int ret; + int ret = -EINVAL; transport_len = ntohs(ipv6_hdr(skb)->payload_len); transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); - skb_get(skb); skb_chk = skb_checksum_trimmed(skb, transport_len, ipv6_mc_validate_checksum); if (!skb_chk) - return -EINVAL; + goto err; - if (!pskb_may_pull(skb_chk, len)) { - kfree_skb(skb_chk); - return -EINVAL; - } + if (!pskb_may_pull(skb_chk, len)) + goto err; ret = ipv6_mc_check_mld_msg(skb_chk); - if (ret) { - kfree_skb(skb_chk); - return ret; - } + if (ret) + goto err; if (skb_trimmed) *skb_trimmed = skb_chk; - else + /* free now unneeded clone */ + else if (skb_chk != skb) kfree_skb(skb_chk); - return 0; + ret = 0; + +err: + if (ret && skb_chk && skb_chk != skb) + kfree_skb(skb_chk); + + return ret; } /** @@ -179,7 +181,7 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) * * Checks whether an IPv6 packet is a valid MLD packet. If so sets - * skb network and transport headers accordingly and returns zero. + * skb transport header accordingly and returns zero. * * -EINVAL: A broken packet was detected, i.e. it violates some internet * standard @@ -194,7 +196,8 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb, * to leave the original skb and its full frame unchanged (which might be * desirable for layer 2 frame jugglers). * - * The caller needs to release a reference count from any returned skb_trimmed. + * Caller needs to set the skb network header and free any returned skb if it + * differs from the provided skb. */ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) { -- cgit v1.2.3 From ad706862890171e02df1d7391b05599fb676ec18 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 14 Aug 2015 11:05:52 -0700 Subject: ipv6: Remove un-used argument from ip6_dst_alloc() After 4b32b5ad31a6 ("ipv6: Stop rt6_info from using inet_peer's metrics"), ip6_dst_alloc() does not need the 'table' argument. This patch cleans it up. Signed-off-by: Martin KaFai Lau CC: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9de4d2bcd916..c95c3197c186 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -318,8 +318,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { /* allocate dst with ip6_dst_ops */ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags, - struct fib6_table *table) + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); @@ -336,10 +335,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, static struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, - int flags, - struct fib6_table *table) + int flags) { - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); + struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); if (rt) { rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); @@ -950,8 +948,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) ort = (struct rt6_info *)ort->dst.from; - rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, - 0, ort->rt6i_table); + rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); if (!rt) return NULL; @@ -983,8 +980,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) struct rt6_info *pcpu_rt; pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), - rt->dst.dev, rt->dst.flags, - rt->rt6i_table); + rt->dst.dev, rt->dst.flags); if (!pcpu_rt) return NULL; @@ -1555,7 +1551,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, if (unlikely(!idev)) return ERR_PTR(-ENODEV); - rt = ip6_dst_alloc(net, dev, 0, NULL); + rt = ip6_dst_alloc(net, dev, 0); if (unlikely(!rt)) { in6_dev_put(idev); dst = ERR_PTR(-ENOMEM); @@ -1742,7 +1738,8 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, + (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); if (!rt) { err = -ENOMEM; @@ -2399,7 +2396,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { struct net *net = dev_net(idev->dev); struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT, NULL); + DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From a73e4195636c17f310b8530643a576f42b82385f Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 14 Aug 2015 11:05:53 -0700 Subject: ipv6: Add rt6_make_pcpu_route() It is a prep work for fixing a potential deadlock when creating a pcpu rt. The current rt6_get_pcpu_route() will also create a pcpu rt if one does not exist. This patch moves the pcpu rt creation logic into another function, rt6_make_pcpu_route(). Signed-off-by: Martin KaFai Lau CC: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c95c3197c186..0a82653efc88 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -993,13 +993,21 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) /* It should be called with read_lock_bh(&tb6_lock) acquired */ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) { - struct rt6_info *pcpu_rt, *prev, **p; + struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt) - goto done; + if (pcpu_rt) { + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + } + return pcpu_rt; +} + +static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +{ + struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { @@ -1009,6 +1017,7 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) goto done; } + p = this_cpu_ptr(rt->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); if (prev) { /* If someone did it before us, return prev instead */ @@ -1093,8 +1102,11 @@ redo_rt6_select: rt->dst.lastuse = jiffies; rt->dst.__use++; pcpu_rt = rt6_get_pcpu_route(rt); - read_unlock_bh(&table->tb6_lock); + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(rt); + + read_unlock_bh(&table->tb6_lock); return pcpu_rt; } } -- cgit v1.2.3 From 9c7370a166b4e157137bfbfe2ad296d57147547c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 14 Aug 2015 11:05:54 -0700 Subject: ipv6: Fix a potential deadlock when creating pcpu rt rt6_make_pcpu_route() is called under read_lock(&table->tb6_lock). rt6_make_pcpu_route() calls ip6_rt_pcpu_alloc(rt) which then calls dst_alloc(). dst_alloc() _may_ call ip6_dst_gc() which takes the write_lock(&tabl->tb6_lock). A visualized version: read_lock(&table->tb6_lock); rt6_make_pcpu_route(); => ip6_rt_pcpu_alloc(); => dst_alloc(); => ip6_dst_gc(); => write_lock(&table->tb6_lock); /* oops */ The fix is to do a read_unlock first before calling ip6_rt_pcpu_alloc(). A reported stack: [141625.537638] INFO: rcu_sched self-detected stall on CPU { 27} (t=60000 jiffies g=4159086 c=4159085 q=2139) [141625.547469] Task dump for CPU 27: [141625.550881] mtr R running task 0 22121 22081 0x00000008 [141625.558069] 0000000000000000 ffff88103f363d98 ffffffff8106e488 000000000000001b [141625.565641] ffffffff81684900 ffff88103f363db8 ffffffff810702b0 0000000008000000 [141625.573220] ffffffff81684900 ffff88103f363de8 ffffffff8108df9f ffff88103f375a00 [141625.580803] Call Trace: [141625.583345] [] sched_show_task+0xc1/0xc6 [141625.589650] [] dump_cpu_task+0x35/0x39 [141625.595144] [] rcu_dump_cpu_stacks+0x6a/0x8c [141625.601320] [] rcu_check_callbacks+0x1f6/0x5d4 [141625.607669] [] update_process_times+0x2a/0x4f [141625.613925] [] tick_sched_handle+0x32/0x3e [141625.619923] [] tick_sched_timer+0x35/0x5c [141625.625830] [] __hrtimer_run_queues+0x8f/0x18d [141625.632171] [] hrtimer_interrupt+0xa0/0x166 [141625.638258] [] local_apic_timer_interrupt+0x4e/0x52 [141625.645036] [] smp_apic_timer_interrupt+0x39/0x4a [141625.651643] [] apic_timer_interrupt+0x68/0x70 [141625.657895] [] ? dst_destroy+0x7c/0xb5 [141625.664188] [] ? fib6_flush_trees+0x20/0x20 [141625.670272] [] ? queue_write_lock_slowpath+0x60/0x6f [141625.677140] [] _raw_write_lock_bh+0x23/0x25 [141625.683218] [] __fib6_clean_all+0x40/0x82 [141625.689124] [] ? fib6_flush_trees+0x20/0x20 [141625.695207] [] fib6_clean_all+0xe/0x10 [141625.700854] [] fib6_run_gc+0x79/0xc8 [141625.706329] [] ip6_dst_gc+0x85/0xf9 [141625.711718] [] dst_alloc+0x55/0x159 [141625.717105] [] __ip6_dst_alloc.isra.32+0x19/0x63 [141625.723620] [] ip6_pol_route+0x36a/0x3e8 [141625.729441] [] ip6_pol_route_output+0x11/0x13 [141625.735700] [] fib6_rule_action+0xa7/0x1bf [141625.741698] [] ? ip6_pol_route_input+0x17/0x17 [141625.748043] [] fib_rules_lookup+0xb5/0x12a [141625.754050] [] ? poll_select_copy_remaining+0xf9/0xf9 [141625.761002] [] fib6_rule_lookup+0x37/0x5c [141625.766914] [] ? ip6_pol_route_input+0x17/0x17 [141625.773260] [] ip6_route_output+0x7a/0x82 [141625.779177] [] ip6_dst_lookup_tail+0x53/0x112 [141625.785437] [] ip6_dst_lookup_flow+0x2a/0x6b [141625.791604] [] rawv6_sendmsg+0x407/0x9b6 [141625.797423] [] ? do_ipv6_setsockopt.isra.8+0xd87/0xde2 [141625.804464] [] inet_sendmsg+0x57/0x8e [141625.810028] [] sock_sendmsg+0x2e/0x3c [141625.815588] [] SyS_sendto+0xfe/0x143 [141625.821063] [] ? rawv6_setsockopt+0x5e/0x67 [141625.827146] [] ? sock_common_setsockopt+0xf/0x11 [141625.833660] [] ? SyS_setsockopt+0x81/0xa2 [141625.839565] [] entry_SYSCALL_64_fastpath+0x12/0x6a Fixes: d52d3997f843 ("pv6: Create percpu rt6_info") Signed-off-by: Martin KaFai Lau CC: Hannes Frederic Sowa Reported-by: Steinar H. Gunderson Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 2 ++ net/ipv6/route.c | 44 +++++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 55d19861ab20..548c6237b1e7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -172,6 +172,8 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) *ppcpu_rt = NULL; } } + + non_pcpu_rt->rt6i_pcpu = NULL; } static void rt6_release(struct rt6_info *rt) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0a82653efc88..d15586490cec 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1007,27 +1007,39 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) { + struct fib6_table *table = rt->rt6i_table; struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { struct net *net = dev_net(rt->dst.dev); - pcpu_rt = net->ipv6.ip6_null_entry; - goto done; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return net->ipv6.ip6_null_entry; } - p = this_cpu_ptr(rt->rt6i_pcpu); - prev = cmpxchg(p, NULL, pcpu_rt); - if (prev) { - /* If someone did it before us, return prev instead */ + read_lock_bh(&table->tb6_lock); + if (rt->rt6i_pcpu) { + p = this_cpu_ptr(rt->rt6i_pcpu); + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + } else { + /* rt has been removed from the fib6 tree + * before we have a chance to acquire the read_lock. + * In this case, don't brother to create a pcpu rt + * since rt is going away anyway. The next + * dst_check() will trigger a re-lookup. + */ dst_destroy(&pcpu_rt->dst); - pcpu_rt = prev; + pcpu_rt = rt; } - -done: dst_hold(&pcpu_rt->dst); rt6_dst_from_metrics_check(pcpu_rt); + read_unlock_bh(&table->tb6_lock); return pcpu_rt; } @@ -1103,11 +1115,21 @@ redo_rt6_select: rt->dst.__use++; pcpu_rt = rt6_get_pcpu_route(rt); - if (!pcpu_rt) + if (pcpu_rt) { + read_unlock_bh(&table->tb6_lock); + } else { + /* We have to do the read_unlock first + * because rt6_make_pcpu_route() may trigger + * ip6_dst_gc() which will take the write_lock. + */ + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); pcpu_rt = rt6_make_pcpu_route(rt); + dst_release(&rt->dst); + } - read_unlock_bh(&table->tb6_lock); return pcpu_rt; + } } -- cgit v1.2.3