From cc9b17ad29ecaa20bfe426a8d4dbfb94b13ff1cc Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 30 May 2012 21:18:10 +0000 Subject: net: sock: validate data_len before allocating skb in sock_alloc_send_pskb() We need to validate the number of pages consumed by data_len, otherwise frags array could be overflowed by userspace. So this patch validate data_len and return -EMSGSIZE when data_len may occupies more frags than MAX_SKB_FRAGS. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- net/core/sock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 653f8c0aedc5..9e5b71fda6ec 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1592,6 +1592,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, gfp_t gfp_mask; long timeo; int err; + int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + err = -EMSGSIZE; + if (npages > MAX_SKB_FRAGS) + goto failure; gfp_mask = sk->sk_allocation; if (gfp_mask & __GFP_WAIT) @@ -1610,14 +1615,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { skb = alloc_skb(header_len, gfp_mask); if (skb) { - int npages; int i; /* No pages, we're done... */ if (!data_len) break; - npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; skb->truesize += data_len; skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { -- cgit v1.2.3 From 7433819a1eefd4e74711fffd6d54e30a644ef240 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 31 May 2012 21:00:26 +0000 Subject: tcp: do not create inetpeer on SYNACK message Another problem on SYNFLOOD/DDOS attack is the inetpeer cache getting larger and larger, using lots of memory and cpu time. tcp_v4_send_synack() ->inet_csk_route_req() ->ip_route_output_flow() ->rt_set_nexthop() ->rt_init_metrics() ->inet_getpeer( create = true) This is a side effect of commit a4daad6b09230 (net: Pre-COW metrics for TCP) added in 2.6.39 Possible solution : Instruct inet_csk_route_req() to remove FLOWI_FLAG_PRECOW_METRICS Before patch : # grep peer /proc/slabinfo inet_peer_cache 4175430 4175430 192 42 2 : tunables 0 0 0 : slabdata 99415 99415 0 Samples: 41K of event 'cycles', Event count (approx.): 30716565122 + 20,24% ksoftirqd/0 [kernel.kallsyms] [k] inet_getpeer + 8,19% ksoftirqd/0 [kernel.kallsyms] [k] peer_avl_rebalance.isra.1 + 4,81% ksoftirqd/0 [kernel.kallsyms] [k] sha_transform + 3,64% ksoftirqd/0 [kernel.kallsyms] [k] fib_table_lookup + 2,36% ksoftirqd/0 [ixgbe] [k] ixgbe_poll + 2,16% ksoftirqd/0 [kernel.kallsyms] [k] __ip_route_output_key + 2,11% ksoftirqd/0 [kernel.kallsyms] [k] kernel_map_pages + 2,11% ksoftirqd/0 [kernel.kallsyms] [k] ip_route_input_common + 2,01% ksoftirqd/0 [kernel.kallsyms] [k] __inet_lookup_established + 1,83% ksoftirqd/0 [kernel.kallsyms] [k] md5_transform + 1,75% ksoftirqd/0 [kernel.kallsyms] [k] check_leaf.isra.9 + 1,49% ksoftirqd/0 [kernel.kallsyms] [k] ipt_do_table + 1,46% ksoftirqd/0 [kernel.kallsyms] [k] hrtimer_interrupt + 1,45% ksoftirqd/0 [kernel.kallsyms] [k] kmem_cache_alloc + 1,29% ksoftirqd/0 [kernel.kallsyms] [k] inet_csk_search_req + 1,29% ksoftirqd/0 [kernel.kallsyms] [k] __netif_receive_skb + 1,16% ksoftirqd/0 [kernel.kallsyms] [k] copy_user_generic_string + 1,15% ksoftirqd/0 [kernel.kallsyms] [k] kmem_cache_free + 1,02% ksoftirqd/0 [kernel.kallsyms] [k] tcp_make_synack + 0,93% ksoftirqd/0 [kernel.kallsyms] [k] _raw_spin_lock_bh + 0,87% ksoftirqd/0 [kernel.kallsyms] [k] __call_rcu + 0,84% ksoftirqd/0 [kernel.kallsyms] [k] rt_garbage_collect + 0,84% ksoftirqd/0 [kernel.kallsyms] [k] fib_rules_lookup Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Cc: Jesper Dangaard Brouer Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 95e61596e605..f9ee7417f6a0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -377,7 +377,8 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, - sk->sk_protocol, inet_sk_flowi_flags(sk), + sk->sk_protocol, + inet_sk_flowi_flags(sk) & ~FLOWI_FLAG_PRECOW_METRICS, (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); security_req_classify_flow(req, flowi4_to_flowi(fl4)); -- cgit v1.2.3 From fff3269907897ee91406ece125795f53e722677e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 Jun 2012 01:47:50 +0000 Subject: tcp: reflect SYN queue_mapping into SYNACK packets While testing how linux behaves on SYNFLOOD attack on multiqueue device (ixgbe), I found that SYNACK messages were dropped at Qdisc level because we send them all on a single queue. Obvious choice is to reflect incoming SYN packet @queue_mapping to SYNACK packet. Under stress, my machine could only send 25.000 SYNACK per second (for 200.000 incoming SYN per second). NIC : ixgbe with 16 rx/tx queues. After patch, not a single SYNACK is dropped. Signed-off-by: Eric Dumazet Cc: Hans Schillstrom Cc: Jesper Dangaard Brouer Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 ++++++--- net/ipv6/tcp_ipv6.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a43b87dfe800..c8d28c433b2b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -824,7 +824,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp, + u16 queue_mapping) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -840,6 +841,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); + skb_set_queue_mapping(skb, queue_mapping); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); @@ -854,7 +856,7 @@ static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v4_send_synack(sk, NULL, req, rvp); + return tcp_v4_send_synack(sk, NULL, req, rvp, 0); } /* @@ -1422,7 +1424,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_synack = tcp_time_stamp; if (tcp_v4_send_synack(sk, dst, req, - (struct request_values *)&tmp_ext) || + (struct request_values *)&tmp_ext, + skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 554d5999abc4..3a9aec29581a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -476,7 +476,8 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp, + u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -513,6 +514,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); fl6.daddr = treq->rmt_addr; + skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, &fl6, opt, np->tclass); err = net_xmit_eval(err); } @@ -528,7 +530,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v6_send_synack(sk, req, rvp); + return tcp_v6_send_synack(sk, req, rvp, 0); } static void tcp_v6_reqsk_destructor(struct request_sock *req) @@ -1213,7 +1215,8 @@ have_isn: security_inet_conn_request(sk, skb, req); if (tcp_v6_send_synack(sk, req, - (struct request_values *)&tmp_ext) || + (struct request_values *)&tmp_ext, + skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; -- cgit v1.2.3