From 35db0691218959f05efbe26fd623ee67581ebff6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 24 Apr 2017 15:33:39 +0800 Subject: xfrm: do the garbage collection after flushing policy Now xfrm garbage collection can be triggered by 'ip xfrm policy del'. These is no reason not to do it after flushing policies, especially considering that 'garbage collection deferred' is only triggered when it reaches gc_thresh. It's no good that the policy is gone but the xdst still hold there. The worse thing is that xdst->route/orig_dst is also hold and can not be released even if the orig_dst is already expired. This patch is to do the garbage collection if there is any policy removed in xfrm_policy_flush. Signed-off-by: Xin Long Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 236cbbc0ab9c..dfc77b9c5e5a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1006,6 +1006,10 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + + if (cnt) + xfrm_garbage_collect(net); + return err; } EXPORT_SYMBOL(xfrm_policy_flush); -- cgit v1.2.3 From cfcf99f987ba321a3d122580716beb9b08d52eb8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 27 Apr 2017 12:03:37 +0200 Subject: xfrm: fix GRO for !CONFIG_NETFILTER In xfrm_input() when called from GRO, async == 0, and we end up skipping the processing in xfrm4_transport_finish(). GRO path will always skip the NF_HOOK, so we don't need the special-case for !NETFILTER during GRO processing. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 46bdb4fbed0b..e23570b647ae 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -395,7 +395,7 @@ resume: if (xo) xfrm_gro = xo->flags & XFRM_GRO; - err = x->inner_mode->afinfo->transport_finish(skb, async); + err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); -- cgit v1.2.3 From 42b531de17d2f6bb9293f23398f6d9bb94635c3e Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Wed, 26 Apr 2017 10:05:00 +0200 Subject: tipc: Fix missing connection request handling In filter_connect, we use waitqueue_active() to check for any connections to wakeup. But waitqueue_active() is missing memory barriers while accessing the critical sections, leading to inconsistent results. In this commit, we replace this with an SMP safe wq_has_sleeper() using the generic socket callback sk_data_ready(). Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 566906795c8c..3b8df510a80c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1581,8 +1581,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + sk->sk_data_ready(sk); /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); -- cgit v1.2.3 From 4e0df4951e9ecb7ec026a1330ed59f12e8937a95 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Wed, 26 Apr 2017 10:05:01 +0200 Subject: tipc: improve error validations for sockets in CONNECTING state Until now, the checks for sockets in CONNECTING state was based on the assumption that the incoming message was always from the peer's accepted data socket. However an application using a non-blocking socket sends an implicit connect, this socket which is in CONNECTING state can receive error messages from the peer's listening socket. As we discard these messages, the application socket hangs as there due to inactivity. In addition to this, there are other places where we process errors but do not notify the user. In this commit, we process such incoming error messages and notify our users about them using sk_state_change(). Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b8df510a80c..38c367f6ced4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1259,7 +1259,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) struct sock *sk = sock->sk; DEFINE_WAIT(wait); long timeo = *timeop; - int err; + int err = sock_error(sk); + + if (err) + return err; for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -1281,6 +1284,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = sock_intr_errno(timeo); if (signal_pending(current)) break; + + err = sock_error(sk); + if (err) + break; } finish_wait(sk_sleep(sk), &wait); *timeop = timeo; @@ -1551,6 +1558,8 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); + u32 pport = msg_origport(hdr); + u32 pnode = msg_orignode(hdr); if (unlikely(msg_mcast(hdr))) return false; @@ -1558,18 +1567,28 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) switch (sk->sk_state) { case TIPC_CONNECTING: /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(hdr))) - return false; + if (unlikely(!msg_connected(hdr))) { + if (pport != tsk_peer_port(tsk) || + pnode != tsk_peer_node(tsk)) + return false; + + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } if (unlikely(msg_errcode(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); return true; } if (unlikely(!msg_isdata(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; + sk->sk_state_change(sk); return true; } -- cgit v1.2.3 From c1be7756284b0fdbfe8aea8da968ce054697e0c5 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Wed, 26 Apr 2017 10:05:02 +0200 Subject: tipc: close the connection if protocol messages contain errors When a socket is shutting down, we notify the peer node about the connection termination by reusing an incoming message if possible. If the last received message was a connection acknowledgment message, we reverse this message and set the error code to TIPC_ERR_NO_PORT and send it to peer. In tipc_sk_proto_rcv(), we never check for message errors while processing the connection acknowledgment or probe messages. Thus this message performs the usual flow control accounting and leaves the session hanging. In this commit, we terminate the connection when we receive such error messages. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 38c367f6ced4..bdce99f9407a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -866,6 +866,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + goto exit; + } + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { -- cgit v1.2.3 From 9b83e0319840eca758ef586776a427284ff767bf Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 28 Apr 2017 10:54:32 +0200 Subject: ipv4: Don't pass IP fragments to upper layer GRO handlers. Upper layer GRO handlers can not handle IP fragments, so exit GRO processing in this case. This fixes ESP GRO because the packet must be reassembled before we can decapsulate, otherwise we get authentication failures. It also aligns IPv4 to IPv6 where packets with fragmentation headers are not passed to upper layer GRO handlers. Fixes: 7785bba299a8 ("esp: Add a software GRO codepath") Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b1fc6e4278e..13a9a3297eae 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1343,6 +1343,9 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (*(u8 *)iph != 0x45) goto out_unlock; + if (ip_is_fragment(iph)) + goto out_unlock; + if (unlikely(ip_fast_csum((u8 *)iph, 5))) goto out_unlock; -- cgit v1.2.3 From 7162fb242cb8322beb558828fd26b33c3e9fc805 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Apr 2017 17:15:40 -0700 Subject: tcp: do not underestimate skb->truesize in tcp_trim_head() Andrey found a way to trigger the WARN_ON_ONCE(delta < len) in skb_try_coalesce() using syzkaller and a filter attached to a TCP socket over loopback interface. I believe one issue with looped skbs is that tcp_trim_head() can end up producing skb with under estimated truesize. It hardly matters for normal conditions, since packets sent over loopback are never truncated. Bytes trimmed from skb->head should not change skb truesize, since skb->head is not reallocated. Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c3c082ed3879..a85d863c4419 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1267,7 +1267,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, * eventually). The difference is that pulled data not copied, but * immediately discarded. */ -static void __pskb_trim_head(struct sk_buff *skb, int len) +static int __pskb_trim_head(struct sk_buff *skb, int len) { struct skb_shared_info *shinfo; int i, k, eat; @@ -1277,7 +1277,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) __skb_pull(skb, eat); len -= eat; if (!len) - return; + return 0; } eat = len; k = 0; @@ -1303,23 +1303,28 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) skb_reset_tail_pointer(skb); skb->data_len -= len; skb->len = skb->data_len; + return len; } /* Remove acked data from a packet in the transmit queue. */ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { + u32 delta_truesize; + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; - __pskb_trim_head(skb, len); + delta_truesize = __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; - skb->truesize -= len; - sk->sk_wmem_queued -= len; - sk_mem_uncharge(sk, len); - sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + if (delta_truesize) { + skb->truesize -= delta_truesize; + sk->sk_wmem_queued -= delta_truesize; + sk_mem_uncharge(sk, delta_truesize); + sock_set_flag(sk, SOCK_QUEUE_SHRUNK); + } /* Any change of skb->len requires recalculation of tso factor. */ if (tcp_skb_pcount(skb) > 1) -- cgit v1.2.3 From c21b48cc1bbf2f5af3ef54ada559f7fadf8b508b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Apr 2017 09:07:46 -0700 Subject: net: adjust skb->truesize in ___pskb_trim() Andrey found a way to trigger the WARN_ON_ONCE(delta < len) in skb_try_coalesce() using syzkaller and a filter attached to a TCP socket. As we did recently in commit 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") we can adjust skb->truesize from ___pskb_trim(), via a call to skb_condense(). If all frags were freed, then skb->truesize can be recomputed. This call can be done if skb is not yet owned, or destructor is sock_edemux(). Signed-off-by: Eric Dumazet Reported-by: Andrey Konovalov Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f86bf69cfb8d..f1d04592ace0 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1576,6 +1576,8 @@ done: skb_set_tail_pointer(skb, len); } + if (!skb->sk || skb->destructor == sock_edemux) + skb_condense(skb); return 0; } EXPORT_SYMBOL(___pskb_trim); -- cgit v1.2.3