From 73f156a6e8c1074ac6327e0abd1169e95eb66463 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Jun 2014 05:26:03 -0700 Subject: inetpeer: get rid of ip_id_count Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inetpeer.h | 23 +++-------------------- include/net/ip.h | 40 +++++++++++++++++++++++----------------- include/net/ipv6.h | 2 -- include/net/secure_seq.h | 2 -- 4 files changed, 26 insertions(+), 41 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 6efe73c79c52..823ec7bb9c67 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -41,14 +41,13 @@ struct inet_peer { struct rcu_head gc_rcu; }; /* - * Once inet_peer is queued for deletion (refcnt == -1), following fields - * are not available: rid, ip_id_count + * Once inet_peer is queued for deletion (refcnt == -1), following field + * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ - atomic_t ip_id_count; /* IP ID for the next packet */ }; struct rcu_head rcu; struct inet_peer *gc_next; @@ -165,7 +164,7 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); void inetpeer_invalidate_tree(struct inet_peer_base *); /* - * temporary check to make sure we dont access rid, ip_id_count, tcp_ts, + * temporary check to make sure we dont access rid, tcp_ts, * tcp_ts_stamp if no refcount is taken on inet_peer */ static inline void inet_peer_refcheck(const struct inet_peer *p) @@ -173,20 +172,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p) WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0); } - -/* can be called with or without local BH being disabled */ -static inline int inet_getid(struct inet_peer *p, int more) -{ - int old, new; - more++; - inet_peer_refcheck(p); - do { - old = atomic_read(&p->ip_id_count); - new = old + more; - if (!new) - new = 1; - } while (atomic_cmpxchg(&p->ip_id_count, old, new) != old); - return new; -} - #endif /* _NET_INETPEER_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 2e4947895d75..0e795df05ec9 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -309,9 +309,19 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) } } -void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); +#define IP_IDENTS_SZ 2048u +extern atomic_t *ip_idents; -static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk) +static inline u32 ip_idents_reserve(u32 hash, int segs) +{ + atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; + + return atomic_add_return(segs, id_ptr) - segs; +} + +void __ip_select_ident(struct iphdr *iph, int segs); + +static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) { struct iphdr *iph = ip_hdr(skb); @@ -321,24 +331,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s * does not change, they drop every other packet in * a TCP stream using header compression. */ - iph->id = (sk && inet_sk(sk)->inet_daddr) ? - htons(inet_sk(sk)->inet_id++) : 0; - } else - __ip_select_ident(iph, dst, 0); -} - -static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more) -{ - struct iphdr *iph = ip_hdr(skb); - - if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { if (sk && inet_sk(sk)->inet_daddr) { iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += 1 + more; - } else + inet_sk(sk)->inet_id += segs; + } else { iph->id = 0; - } else - __ip_select_ident(iph, dst, more); + } + } else { + __ip_select_ident(iph, segs); + } +} + +static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk) +{ + ip_select_ident_segs(skb, sk, 1); } static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ba810d0546bc..574337fe72dd 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -668,8 +668,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt); - int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index f257486f17be..3f36d45b714a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -3,8 +3,6 @@ #include -__u32 secure_ip_id(__be32 daddr); -__u32 secure_ipv6_id(const __be32 daddr[4]); u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -- cgit v1.2.3