From 9979a55a833883242e3a29f3596676edd7199c46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Mar 2013 14:38:28 +0000 Subject: net: remove a WARN_ON() in net_enable_timestamp() The WARN_ON(in_interrupt()) in net_enable_timestamp() can get false positive, in socket clone path, run from softirq context : [ 3641.624425] WARNING: at net/core/dev.c:1532 net_enable_timestamp+0x7b/0x80() [ 3641.668811] Call Trace: [ 3641.671254] [] warn_slowpath_common+0x87/0xc0 [ 3641.677871] [] warn_slowpath_null+0x1a/0x20 [ 3641.683683] [] net_enable_timestamp+0x7b/0x80 [ 3641.689668] [] sk_clone_lock+0x425/0x450 [ 3641.695222] [] inet_csk_clone_lock+0x16/0x170 [ 3641.701213] [] tcp_create_openreq_child+0x29/0x820 [ 3641.707663] [] ? ipt_do_table+0x222/0x670 [ 3641.713354] [] tcp_v4_syn_recv_sock+0xab/0x3d0 [ 3641.719425] [] tcp_check_req+0x3da/0x530 [ 3641.724979] [] ? inet_hashinfo_init+0x60/0x80 [ 3641.730964] [] ? tcp_v4_rcv+0x79f/0xbe0 [ 3641.736430] [] tcp_v4_do_rcv+0x38d/0x4f0 [ 3641.741985] [] tcp_v4_rcv+0xa7a/0xbe0 Its safe at this point because the parent socket owns a reference on the netstamp_needed, so we cant have a 0 -> 1 transition, which requires to lock a mutex. Instead of refining the check, lets remove it, as all known callers are safe. If it ever changes in the future, static_key_slow_inc() will complain anyway. Reported-by: Laurent Chavey Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d540ced1f6c6..b13e5c766c11 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1545,7 +1545,6 @@ void net_enable_timestamp(void) return; } #endif - WARN_ON(in_interrupt()); static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); -- cgit v1.2.3 From 4a7df340ed1bac190c124c1601bfc10cde9fb4fb Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Mar 2013 19:14:07 +0000 Subject: 8021q: fix a potential use-after-free vlan_vid_del() could possibly free ->vlan_info after a RCU grace period, however, we may still refer to the freed memory area by 'grp' pointer. Found by code inspection. This patch moves vlan_vid_del() as behind as possible. Cc: Patrick McHardy Cc: "David S. Miller" Signed-off-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/8021q/vlan.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index a18714469bf7..85addcd9372b 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -86,13 +86,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) grp = &vlan_info->grp; - /* Take it out of our own structures, but be sure to interlock with - * HW accelerating devices or SW vlan input packet processing if - * VLAN is not 0 (leave it there for 802.1p). - */ - if (vlan_id) - vlan_vid_del(real_dev, vlan_id); - grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) @@ -114,6 +107,13 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) vlan_gvrp_uninit_applicant(real_dev); } + /* Take it out of our own structures, but be sure to interlock with + * HW accelerating devices or SW vlan input packet processing if + * VLAN is not 0 (leave it there for 802.1p). + */ + if (vlan_id) + vlan_vid_del(real_dev, vlan_id); + /* Get rid of the vlan's reference to real_dev */ dev_put(real_dev); } -- cgit v1.2.3 From 9b46922e15f4d9d2aedcd320c3b7f7f54d956da7 Mon Sep 17 00:00:00 2001 From: Hong zhi guo Date: Sat, 23 Mar 2013 02:27:50 +0000 Subject: bridge: fix crash when set mac address of br interface When I tried to set mac address of a bridge interface to a mac address which already learned on this bridge, I got system hang. The cause is straight forward: function br_fdb_change_mac_address calls fdb_insert with NULL source nbp. Then an fdb lookup is performed. If an fdb entry is found and it's local, it's OK. But if it's not local, source is dereferenced for printk without NULL check. Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index b0812c91c0f0..bab338e6270d 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -423,7 +423,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source, return 0; br_warn(br, "adding interface %s with same address " "as a received packet\n", - source->dev->name); + source ? source->dev->name : br->dev->name); fdb_delete(br, fdb); } -- cgit v1.2.3 From 7ebe183c6d444ef5587d803b64a1f4734b18c564 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Sun, 24 Mar 2013 10:42:25 +0000 Subject: tcp: undo spurious timeout after SACK reneging On SACK reneging the sender immediately retransmits and forces a timeout but disables Eifel (undo). If the (buggy) receiver does not drop any packet this can trigger a false slow-start retransmit storm driven by the ACKs of the original packets. This can be detected with undo and TCP timestamps. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0d9bdacce99f..3bd55bad230a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2059,11 +2059,8 @@ void tcp_enter_loss(struct sock *sk, int how) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - if (!how) { - /* Push undo marker, if it was plain RTO and nothing - * was retransmitted. */ - tp->undo_marker = tp->snd_una; - } else { + tp->undo_marker = tp->snd_una; + if (how) { tp->sacked_out = 0; tp->fackets_out = 0; } -- cgit v1.2.3 From 1166fde6a923c30f4351515b6a9a1efc513e7d00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 25 Mar 2013 11:23:40 -0400 Subject: SUNRPC: Add barriers to ensure read ordering in rpc_wake_up_task_queue_locked We need to be careful when testing task->tk_waitqueue in rpc_wake_up_task_queue_locked, because it can be changed while we are holding the queue->lock. By adding appropriate memory barriers, we can ensure that it is safe to test task->tk_waitqueue for equality if the RPC_TASK_QUEUED bit is set. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org --- net/sunrpc/sched.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index fb20f25ddec9..f8529fc8e542 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -180,6 +180,8 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; queue->qlen++; + /* barrier matches the read in rpc_wake_up_task_queue_locked() */ + smp_wmb(); rpc_set_queued(task); dprintk("RPC: %5u added to queue %p \"%s\"\n", @@ -430,8 +432,11 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task */ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) - __rpc_do_wake_up_task(queue, task); + if (RPC_IS_QUEUED(task)) { + smp_rmb(); + if (task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); + } } /* -- cgit v1.2.3 From ded34e0fe8fe8c2d595bfa30626654e4b87621e0 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Mar 2013 03:18:33 +0000 Subject: unix: fix a race condition in unix_release() As reported by Jan, and others over the past few years, there is a race condition caused by unix_release setting the sock->sk pointer to NULL before properly marking the socket as dead/orphaned. This can cause a problem with the LSM hook security_unix_may_send() if there is another socket attempting to write to this partially released socket in between when sock->sk is set to NULL and it is marked as dead/orphaned. This patch fixes this by only setting sock->sk to NULL after the socket has been marked as dead; I also take the opportunity to make unix_release_sock() a void function as it only ever returned 0/success. Dave, I think this one should go on the -stable pile. Special thanks to Jan for coming up with a reproducer for this problem. Reported-by: Jan Stancek Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 51be64f163ec..f153a8d6e339 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -382,7 +382,7 @@ static void unix_sock_destructor(struct sock *sk) #endif } -static int unix_release_sock(struct sock *sk, int embrion) +static void unix_release_sock(struct sock *sk, int embrion) { struct unix_sock *u = unix_sk(sk); struct path path; @@ -451,8 +451,6 @@ static int unix_release_sock(struct sock *sk, int embrion) if (unix_tot_inflight) unix_gc(); /* Garbage collect fds */ - - return 0; } static void init_peercred(struct sock *sk) @@ -699,9 +697,10 @@ static int unix_release(struct socket *sock) if (!sk) return 0; + unix_release_sock(sk, 0); sock->sk = NULL; - return unix_release_sock(sk, 0); + return 0; } static int unix_autobind(struct socket *sock) -- cgit v1.2.3 From a79ca223e029aa4f09abb337accf1812c900a800 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Tue, 26 Mar 2013 01:52:45 +0800 Subject: ipv6: fix bad free of addrconf_init_net Signed-off-by: Hong Zhiguo Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f2c7e615f902..26512250e095 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4784,26 +4784,20 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev) static int __net_init addrconf_init_net(struct net *net) { - int err; + int err = -ENOMEM; struct ipv6_devconf *all, *dflt; - err = -ENOMEM; - all = &ipv6_devconf; - dflt = &ipv6_devconf_dflt; + all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); + if (all == NULL) + goto err_alloc_all; - if (!net_eq(net, &init_net)) { - all = kmemdup(all, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) - goto err_alloc_all; + dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); + if (dflt == NULL) + goto err_alloc_dflt; - dflt = kmemdup(dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) - goto err_alloc_dflt; - } else { - /* these will be inherited by all namespaces */ - dflt->autoconf = ipv6_defaults.autoconf; - dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; - } + /* these will be inherited by all namespaces */ + dflt->autoconf = ipv6_defaults.autoconf; + dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; -- cgit v1.2.3 From 14134f6584212d585b310ce95428014b653dfaf6 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Mon, 25 Mar 2013 17:02:04 +0000 Subject: af_unix: dont send SCM_CREDENTIAL when dest socket is NULL SCM_SCREDENTIALS should apply to write() syscalls only either source or destination socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong, and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom). Origionally-authored-by: Karel Srot Signed-off-by: Ding Tianhong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f153a8d6e339..971282b6f6a3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1412,8 +1412,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, if (UNIXCB(skb).cred) return; if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + (other->sk_socket && + test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) { UNIXCB(skb).pid = get_pid(task_tgid(current)); UNIXCB(skb).cred = get_current_cred(); } -- cgit v1.2.3 From 330305cc4a6b0cb75c22fc01b8826f0ad755550f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 24 Mar 2013 17:36:29 +0000 Subject: ipv4: Fix ip-header identification for gso packets. ip-header id needs to be incremented even if IP_DF flag is set. This behaviour was changed in commit 490ab08127cebc25e3a26 (IP_GRE: Fix IP-Identification). Following patch fixes it so that identification is always incremented. Reported-by: Cong Wang Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/net/ipip.h | 16 ++++++---------- net/ipv4/af_inet.c | 3 +-- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/include/net/ipip.h b/include/net/ipip.h index fd19625ff99d..982141c15200 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -77,15 +77,11 @@ static inline void tunnel_ip_select_ident(struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); - if (iph->frag_off & htons(IP_DF)) - iph->id = 0; - else { - /* Use inner packet iph-id if possible. */ - if (skb->protocol == htons(ETH_P_IP) && old_iph->id) - iph->id = old_iph->id; - else - __ip_select_ident(iph, dst, - (skb_shinfo(skb)->gso_segs ?: 1) - 1); - } + /* Use inner packet iph-id if possible. */ + if (skb->protocol == htons(ETH_P_IP) && old_iph->id) + iph->id = old_iph->id; + else + __ip_select_ident(iph, dst, + (skb_shinfo(skb)->gso_segs ?: 1) - 1); } #endif diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 68f6a94f7661..c929d9c1c4b6 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1333,8 +1333,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->frag_off |= htons(IP_MF); offset += (skb->len - skb->mac_len - iph->ihl * 4); } else { - if (!(iph->frag_off & htons(IP_DF))) - iph->id = htons(id++); + iph->id = htons(id++); } iph->tot_len = htons(skb->len - skb->mac_len); iph->check = 0; -- cgit v1.2.3