From 39232973b779ab0c02cb6dcd8f819b7cb0fcd09a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jan 2012 15:22:32 -0500 Subject: ipv4/ipv6: Prepare for new route gateway semantics. In the future the ipv4/ipv6 route gateway will take on two types of values: 1) INADDR_ANY/IN6ADDR_ANY, for local network routes, and in this case the neighbour must be obtained using the destination address in ipv4/ipv6 header as the lookup key. 2) Everything else, the actual nexthop route address. So if the gateway is not inaddr-any we use it, otherwise we must use the packet's destination address. Signed-off-by: David S. Miller --- net/ipv6/route.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8c2e3ab58f2a..7d7f30697ead 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -121,9 +121,23 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) return p; } +static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr) +{ + struct in6_addr *p = &rt->rt6i_gateway; + + if (p->s6_addr32[0] | p->s6_addr32[1] | + p->s6_addr32[2] | p->s6_addr32[3]) + return (const void *) p; + return daddr; +} + static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); + struct rt6_info *rt = (struct rt6_info *) dst; + struct neighbour *n; + + daddr = choose_neigh_daddr(rt, daddr); + n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); if (n) return n; return neigh_create(&nd_tbl, daddr, dst->dev); -- cgit v1.2.3 From 1e2927b08160a14fff98e88e7a331d916aaa1d56 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jan 2012 15:23:21 -0500 Subject: ipv6: sit: Convert to dst_neigh_lookup() The only semantic difference is that we now hold a reference to the neighbour and thus have to release it. Signed-off-by: David S. Miller --- net/ipv6/sit.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 133768e52912..c4ffd1743528 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -680,9 +680,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, /* ISATAP (RFC4214) - must come before 6to4 */ if (dev->priv_flags & IFF_ISATAP) { struct neighbour *neigh = NULL; + bool do_tx_error = false; if (skb_dst(skb)) - neigh = dst_get_neighbour_noref(skb_dst(skb)); + neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { if (net_ratelimit()) @@ -697,6 +698,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, ipv6_addr_is_isatap(addr6)) dst = addr6->s6_addr32[3]; else + do_tx_error = true; + + neigh_release(neigh); + if (do_tx_error) goto tx_error; } @@ -705,9 +710,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!dst) { struct neighbour *neigh = NULL; + bool do_tx_error = false; if (skb_dst(skb)) - neigh = dst_get_neighbour_noref(skb_dst(skb)); + neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { if (net_ratelimit()) @@ -723,10 +729,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, addr_type = ipv6_addr_type(addr6); } - if ((addr_type & IPV6_ADDR_COMPATv4) == 0) - goto tx_error_icmp; + if ((addr_type & IPV6_ADDR_COMPATv4) != 0) + dst = addr6->s6_addr32[3]; + else + do_tx_error = true; - dst = addr6->s6_addr32[3]; + neigh_release(neigh); + if (do_tx_error) + goto tx_error; } rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, -- cgit v1.2.3 From a7563f342db6490e66dbf2c8a50577a72a158c9a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 26 Jan 2012 16:29:16 -0500 Subject: ipv6: Use ipv6_addr_any() Suggested by YOSHIFUJI Hideaki. Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7d7f30697ead..92be12bb8d23 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -125,8 +125,7 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *da { struct in6_addr *p = &rt->rt6i_gateway; - if (p->s6_addr32[0] | p->s6_addr32[1] | - p->s6_addr32[2] | p->s6_addr32[3]) + if (!ipv6_addr_any(p)) return (const void *) p; return daddr; } -- cgit v1.2.3 From eb857186eb771998fc9ab4bfd398a6fedb5a295c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Jan 2012 15:07:56 -0800 Subject: ipv6: ndisc: Convert to dst_neigh_lookup() Now all code paths grab a local reference to the neigh, so if neigh is not NULL we unconditionally release it at the end. The old logic would only release if we didn't have a non-NULL 'rt'. Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d8f02ef88e59..c574ebce3fb5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1223,11 +1223,17 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); - if (rt) - neigh = dst_get_neighbour_noref(&rt->dst); - + if (rt) { + neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + if (!neigh) { + ND_PRINTK0(KERN_ERR + "ICMPv6 RA: %s() got default router without neighbour.\n", + __func__); + dst_release(&rt->dst); + return; + } + } if (rt && lifetime == 0) { - neigh_clone(neigh); ip6_del_rt(rt); rt = NULL; } @@ -1244,7 +1250,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = dst_get_neighbour_noref(&rt->dst); + neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); if (neigh == NULL) { ND_PRINTK0(KERN_ERR "ICMPv6 RA: %s() got default router without neighbour.\n", @@ -1411,7 +1417,7 @@ skip_routeinfo: out: if (rt) dst_release(&rt->dst); - else if (neigh) + if (neigh) neigh_release(neigh); } -- cgit v1.2.3 From 5339ab8b1dd82f14df168fb9bf59449f3e24b03d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Jan 2012 15:14:01 -0800 Subject: ipv6: fib: Convert fib6_age() to dst_neigh_lookup(). In this specific situation we know we are dealing with a gatewayed route and therefore rt6i_gateway is not going to be in6addr_any even in future interpretations. Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b82bcde53f7a..5b27fbcae346 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1552,11 +1552,20 @@ static int fib6_age(struct rt6_info *rt, void *arg) time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) { RT6_TRACE("aging clone %p\n", rt); return -1; - } else if ((rt->rt6i_flags & RTF_GATEWAY) && - (!(dst_get_neighbour_noref_raw(&rt->dst)->flags & NTF_ROUTER))) { - RT6_TRACE("purging route %p via non-router but gateway\n", - rt); - return -1; + } else if (rt->rt6i_flags & RTF_GATEWAY) { + struct neighbour *neigh; + __u8 neigh_flags = 0; + + neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); + if (neigh) { + neigh_flags = neigh->flags; + neigh_release(neigh); + } + if (neigh_flags & NTF_ROUTER) { + RT6_TRACE("purging route %p via non-router but gateway\n", + rt); + return -1; + } } gc_args.more++; } -- cgit v1.2.3 From 4991969a1027826c3db19dd3e600e145603e6928 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Jan 2012 15:30:48 -0800 Subject: ipv6: Remove neigh argument from ndisc_send_redirect() Instead, compute it as-needed inside of that function using dst_neigh_lookup(). Signed-off-by: David S. Miller --- include/net/ndisc.h | 1 - net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 12 ++++++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ndisc.h b/include/net/ndisc.h index e3133c23980e..6f9c25a76cd1 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -133,7 +133,6 @@ extern void ndisc_send_rs(struct net_device *dev, const struct in6_addr *daddr); extern void ndisc_send_redirect(struct sk_buff *skb, - struct neighbour *neigh, const struct in6_addr *target); extern int ndisc_mc_map(const struct in6_addr *addr, char *buf, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d97e07183ce9..604809b89b67 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -486,7 +486,7 @@ int ip6_forward(struct sk_buff *skb) and by source (inside ndisc_send_redirect) */ if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) - ndisc_send_redirect(skb, n, target); + ndisc_send_redirect(skb, target); } else { int addrtype = ipv6_addr_type(&hdr->saddr); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index c574ebce3fb5..8d817018c188 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1512,8 +1512,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) } } -void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, - const struct in6_addr *target) +void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); @@ -1571,6 +1570,13 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, goto release; if (dev->addr_len) { + struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); + if (!neigh) { + ND_PRINTK2(KERN_WARNING + "ICMPv6 Redirect: no neigh for target address\n"); + goto release; + } + read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) { memcpy(ha_buf, neigh->ha, dev->addr_len); @@ -1579,6 +1585,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, len += ndisc_opt_addr_space(dev); } else read_unlock_bh(&neigh->lock); + + neigh_release(neigh); } rd_len = min_t(unsigned int, -- cgit v1.2.3 From c45a3dfb59c0f17bdbd294dd01efb2d7f99a32c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Jan 2012 15:32:19 -0800 Subject: ipv6: Eliminate dst_get_neighbour_noref() usage in ip6_forward(). It's only used to get at neigh->primary_key, which in this context is always going to be the same as rt->rt6i_gateway. Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 604809b89b67..7a98fc2a5d97 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -388,7 +388,6 @@ int ip6_forward(struct sk_buff *skb) struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); - struct neighbour *n; u32 mtu; if (net->ipv6.devconf_all->forwarding == 0) @@ -463,8 +462,7 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - n = dst_get_neighbour_noref(dst); - if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) { + if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; @@ -474,8 +472,8 @@ int ip6_forward(struct sk_buff *skb) */ rt = (struct rt6_info *) dst; - if ((rt->rt6i_flags & RTF_GATEWAY)) - target = (struct in6_addr*)&n->primary_key; + if (rt->rt6i_flags & RTF_GATEWAY) + target = &rt->rt6i_gateway; else target = &hdr->daddr; -- cgit v1.2.3 From 5de658f878d49e952d7b3f3f26a396132829f513 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Jan 2012 04:29:24 +0000 Subject: ipv6: fix RFC5722 comment RFC5722 Section 4 was amended by Errata 3089 Our implementation did the right thing anyway... Signed-off-by: Eric Dumazet Cc: Nicolas Dichtel Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index b69fae76a6f1..9447bd69873a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -336,12 +336,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } found: - /* RFC5722, Section 4: - * When reassembling an IPv6 datagram, if + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if * one or more its constituent fragments is determined to be an * overlapping fragment, the entire datagram (and any constituent - * fragments, including those not yet received) MUST be silently - * discarded. + * fragments) MUST be silently discarded. */ /* Check for overlap with preceding fragment. */ -- cgit v1.2.3 From a2d91241a80ec9bbc5ab24b9a2c4d730b3fa5730 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 01:04:42 +0000 Subject: tcp: md5: remove obsolete md5_add() method We no longer use md5_add() method from struct tcp_sock_af_ops Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ---- net/ipv4/tcp_ipv4.c | 8 -------- net/ipv6/tcp_ipv6.c | 9 --------- 3 files changed, 21 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/tcp.h b/include/net/tcp.h index 0118ea999f67..3c903462adf5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1462,10 +1462,6 @@ struct tcp_sock_af_ops { const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb); - int (*md5_add) (struct sock *sk, - struct sock *addr_sk, - u8 *newkey, - u8 len); int (*md5_parse) (struct sock *sk, char __user *optval, int optlen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 337ba4cca052..345e24928fa6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -967,13 +967,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, } EXPORT_SYMBOL(tcp_v4_md5_do_add); -static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, - u8 *newkey, u8 newkeylen) -{ - return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr, - newkey, newkeylen); -} - int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1853,7 +1846,6 @@ EXPORT_SYMBOL(ipv4_specific); static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, - .md5_add = tcp_v4_md5_add_func, .md5_parse = tcp_v4_parse_md5_keys, }; #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3edd05ae4388..f37769ea6375 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -626,13 +626,6 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, return 0; } -static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk, - u8 *newkey, __u8 newkeylen) -{ - return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr, - newkey, newkeylen); -} - static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) { struct tcp_sock *tp = tcp_sk(sk); @@ -1898,7 +1891,6 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, - .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; #endif @@ -1930,7 +1922,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, - .md5_add = tcp_v6_md5_add_func, .md5_parse = tcp_v6_parse_md5_keys, }; #endif -- cgit v1.2.3 From a915da9b69273815527ccb3789421cb7027b545b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 05:18:33 +0000 Subject: tcp: md5: rcu conversion In order to be able to support proper RST messages for TCP MD5 flows, we need to allow access to MD5 keys without locking listener socket. This conversion is a nice cleanup, and shrinks size of timewait sockets by 80 bytes. IPv6 code reuses generic code found in IPv4 instead of duplicating it. Control path uses GFP_KERNEL allocations instead of GFP_ATOMIC. Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +- include/net/tcp.h | 61 ++++++------- net/ipv4/tcp_ipv4.c | 227 +++++++++++++++++++---------------------------- net/ipv4/tcp_minisocks.c | 12 +-- net/ipv6/tcp_ipv6.c | 173 +++--------------------------------- 5 files changed, 141 insertions(+), 335 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 46a85c9e1f25..c2025f159641 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -486,8 +486,7 @@ struct tcp_timewait_sock { u32 tw_ts_recent; long tw_ts_recent_stamp; #ifdef CONFIG_TCP_MD5SIG - u16 tw_md5_keylen; - u8 tw_md5_key[TCP_MD5SIG_MAXKEYLEN]; + struct tcp_md5sig_key *tw_md5_key; #endif /* Few sockets in timewait have cookies; in that case, then this * object holds a reference to them (tw_cookie_values->kref). diff --git a/include/net/tcp.h b/include/net/tcp.h index 3c903462adf5..10ae4c7b6b4f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1130,35 +1130,26 @@ static inline void tcp_clear_all_retrans_hints(struct tcp_sock *tp) /* MD5 Signature */ struct crypto_hash; +union tcp_md5_addr { + struct in_addr a4; +#if IS_ENABLED(CONFIG_IPV6) + struct in6_addr a6; +#endif +}; + /* - key database */ struct tcp_md5sig_key { - u8 *key; + struct hlist_node node; u8 keylen; -}; - -struct tcp4_md5sig_key { - struct tcp_md5sig_key base; - __be32 addr; -}; - -struct tcp6_md5sig_key { - struct tcp_md5sig_key base; -#if 0 - u32 scope_id; /* XXX */ -#endif - struct in6_addr addr; + u8 family; /* AF_INET or AF_INET6 */ + union tcp_md5_addr addr; + u8 key[TCP_MD5SIG_MAXKEYLEN]; + struct rcu_head rcu; }; /* - sock block */ struct tcp_md5sig_info { - struct tcp4_md5sig_key *keys4; -#if IS_ENABLED(CONFIG_IPV6) - struct tcp6_md5sig_key *keys6; - u32 entries6; - u32 alloced6; -#endif - u32 entries4; - u32 alloced4; + struct hlist_head head; }; /* - pseudo header */ @@ -1195,19 +1186,25 @@ extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, const struct sock *sk, const struct request_sock *req, const struct sk_buff *skb); -extern struct tcp_md5sig_key * tcp_v4_md5_lookup(struct sock *sk, - struct sock *addr_sk); -extern int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, u8 *newkey, - u8 newkeylen); -extern int tcp_v4_md5_do_del(struct sock *sk, __be32 addr); +extern int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, const u8 *newkey, + u8 newkeylen, gfp_t gfp); +extern int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, + int family); +extern struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, + struct sock *addr_sk); #ifdef CONFIG_TCP_MD5SIG -#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_keylen ? \ - &(struct tcp_md5sig_key) { \ - .key = (twsk)->tw_md5_key, \ - .keylen = (twsk)->tw_md5_keylen, \ - } : NULL) +extern struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, int family); +#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else +static inline struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, + int family) +{ + return NULL; +} #define tcp_twsk_md5_key(twsk) NULL #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 345e24928fa6..1d5fd82c5c08 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -90,16 +90,8 @@ EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, - __be32 addr); -static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th); -#else -static inline -struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) -{ - return NULL; -} #endif struct inet_hashinfo tcp_hashinfo; @@ -631,7 +623,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; + key = sk ? tcp_md5_do_lookup(sk, + (union tcp_md5_addr *)&ip_hdr(skb)->saddr, + AF_INET) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -759,7 +753,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, + AF_INET), inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos); } @@ -876,146 +871,124 @@ static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk, */ /* Find the Key structure for an address. */ -static struct tcp_md5sig_key * - tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, + const union tcp_md5_addr *addr, + int family) { struct tcp_sock *tp = tcp_sk(sk); - int i; + struct tcp_md5sig_key *key; + struct hlist_node *pos; + unsigned int size = sizeof(struct in_addr); - if (!tp->md5sig_info || !tp->md5sig_info->entries4) + if (!tp->md5sig_info) return NULL; - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) - return &tp->md5sig_info->keys4[i].base; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + size = sizeof(struct in6_addr); +#endif + hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + if (key->family != family) + continue; + if (!memcmp(&key->addr, addr, size)) + return key; } return NULL; } +EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, struct sock *addr_sk) { - return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); + union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr; + return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, struct request_sock *req) { - return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); + union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr; + return tcp_md5_do_lookup(sk, addr, AF_INET); } /* This can be called on a newly created socket, from other files */ -int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, - u8 *newkey, u8 newkeylen) +int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, + int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) { /* Add Key to the list */ struct tcp_md5sig_key *key; struct tcp_sock *tp = tcp_sk(sk); - struct tcp4_md5sig_key *keys; + struct tcp_md5sig_info *md5sig; - key = tcp_v4_md5_do_lookup(sk, addr); + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (key) { /* Pre-existing entry - just update that one. */ - kfree(key->key); - key->key = newkey; + memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; - } else { - struct tcp_md5sig_info *md5sig; - - if (!tp->md5sig_info) { - tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), - GFP_ATOMIC); - if (!tp->md5sig_info) { - kfree(newkey); - return -ENOMEM; - } - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } + return 0; + } - md5sig = tp->md5sig_info; - if (md5sig->entries4 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { - kfree(newkey); + md5sig = tp->md5sig_info; + if (!md5sig) { + md5sig = kmalloc(sizeof(*md5sig), gfp); + if (!md5sig) return -ENOMEM; - } - - if (md5sig->alloced4 == md5sig->entries4) { - keys = kmalloc((sizeof(*keys) * - (md5sig->entries4 + 1)), GFP_ATOMIC); - if (!keys) { - kfree(newkey); - if (md5sig->entries4 == 0) - tcp_free_md5sig_pool(); - return -ENOMEM; - } - if (md5sig->entries4) - memcpy(keys, md5sig->keys4, - sizeof(*keys) * md5sig->entries4); + sk_nocaps_add(sk, NETIF_F_GSO_MASK); + INIT_HLIST_HEAD(&md5sig->head); + tp->md5sig_info = md5sig; + } - /* Free old key list, and reference new one */ - kfree(md5sig->keys4); - md5sig->keys4 = keys; - md5sig->alloced4++; - } - md5sig->entries4++; - md5sig->keys4[md5sig->entries4 - 1].addr = addr; - md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; - md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; + key = kmalloc(sizeof(*key), gfp); + if (!key) + return -ENOMEM; + if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + kfree(key); + return -ENOMEM; } + + memcpy(key->key, newkey, newkeylen); + key->keylen = newkeylen; + key->family = family; + memcpy(&key->addr, addr, + (family == AF_INET6) ? sizeof(struct in6_addr) : + sizeof(struct in_addr)); + hlist_add_head_rcu(&key->node, &md5sig->head); return 0; } -EXPORT_SYMBOL(tcp_v4_md5_do_add); +EXPORT_SYMBOL(tcp_md5_do_add); -int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) +int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); - int i; - - for (i = 0; i < tp->md5sig_info->entries4; i++) { - if (tp->md5sig_info->keys4[i].addr == addr) { - /* Free the key */ - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4--; - - if (tp->md5sig_info->entries4 == 0) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; - tcp_free_md5sig_pool(); - } else if (tp->md5sig_info->entries4 != i) { - /* Need to do some manipulation */ - memmove(&tp->md5sig_info->keys4[i], - &tp->md5sig_info->keys4[i+1], - (tp->md5sig_info->entries4 - i) * - sizeof(struct tcp4_md5sig_key)); - } - return 0; - } - } - return -ENOENT; + struct tcp_md5sig_key *key; + + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); + if (!key) + return -ENOENT; + hlist_del_rcu(&key->node); + kfree_rcu(key, rcu); + if (hlist_empty(&tp->md5sig_info->head)) + tcp_free_md5sig_pool(); + return 0; } -EXPORT_SYMBOL(tcp_v4_md5_do_del); +EXPORT_SYMBOL(tcp_md5_do_del); -static void tcp_v4_clear_md5_list(struct sock *sk) +void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_key *key; + struct hlist_node *pos, *n; - /* Free each key, then the set of key keys, - * the crypto element, and then decrement our - * hold on the last resort crypto. - */ - if (tp->md5sig_info->entries4) { - int i; - for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4 = 0; + if (!hlist_empty(&tp->md5sig_info->head)) tcp_free_md5sig_pool(); - } - if (tp->md5sig_info->keys4) { - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; + hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_del_rcu(&key->node); + kfree_rcu(key, rcu); } } @@ -1024,7 +997,6 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, { struct tcp_md5sig cmd; struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; - u8 *newkey; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1038,29 +1010,16 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (!cmd.tcpm_key || !cmd.tcpm_keylen) { if (!tcp_sk(sk)->md5sig_info) return -ENOENT; - return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, + AF_INET); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (!tcp_sk(sk)->md5sig_info) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p; - - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - return -EINVAL; - - tp->md5sig_info = p; - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } - - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); - if (!newkey) - return -ENOMEM; - return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, - newkey, cmd.tcpm_keylen); + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, + AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, + GFP_KERNEL); } static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, @@ -1086,7 +1045,7 @@ static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); } -static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { struct tcp_md5sig_pool *hp; @@ -1186,7 +1145,8 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) int genhash; unsigned char newhash[16]; - hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); + hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr, + AF_INET); hash_location = tcp_parse_md5sig_option(th); /* We've parsed the options - do we have a hash? */ @@ -1474,7 +1434,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr); + key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr, + AF_INET); if (key != NULL) { /* * We're using one, so create a matching key @@ -1482,10 +1443,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * memory, then we end up not copying the key * across. Shucks. */ - char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); - if (newkey != NULL) - tcp_v4_md5_do_add(newsk, newinet->inet_daddr, - newkey, key->keylen); + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, + AF_INET, key->key, key->keylen, GFP_ATOMIC); sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif @@ -1934,7 +1893,7 @@ void tcp_v4_destroy_sock(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { - tcp_v4_clear_md5_list(sk); + tcp_clear_md5_list(sk); kfree(tp->md5sig_info); tp->md5sig_info = NULL; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 550e755747e0..3cabafb5cdd1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -359,13 +359,11 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) */ do { struct tcp_md5sig_key *key; - memset(tcptw->tw_md5_key, 0, sizeof(tcptw->tw_md5_key)); - tcptw->tw_md5_keylen = 0; + tcptw->tw_md5_key = NULL; key = tp->af_specific->md5_lookup(sk, sk); if (key != NULL) { - memcpy(&tcptw->tw_md5_key, key->key, key->keylen); - tcptw->tw_md5_keylen = key->keylen; - if (tcp_alloc_md5sig_pool(sk) == NULL) + tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); + if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) BUG(); } } while (0); @@ -405,8 +403,10 @@ void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_keylen) + if (twsk->tw_md5_key) { tcp_free_md5sig_pool(); + kfree_rcu(twsk->tw_md5_key, rcu); + } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f37769ea6375..bec41f9a6413 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -540,19 +540,7 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req) static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, const struct in6_addr *addr) { - struct tcp_sock *tp = tcp_sk(sk); - int i; - - BUG_ON(tp == NULL); - - if (!tp->md5sig_info || !tp->md5sig_info->entries6) - return NULL; - - for (i = 0; i < tp->md5sig_info->entries6; i++) { - if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr)) - return &tp->md5sig_info->keys6[i].base; - } - return NULL; + return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6); } static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, @@ -567,129 +555,11 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); } -static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer, - char *newkey, u8 newkeylen) -{ - /* Add key to the list */ - struct tcp_md5sig_key *key; - struct tcp_sock *tp = tcp_sk(sk); - struct tcp6_md5sig_key *keys; - - key = tcp_v6_md5_do_lookup(sk, peer); - if (key) { - /* modify existing entry - just update that one */ - kfree(key->key); - key->key = newkey; - key->keylen = newkeylen; - } else { - /* reallocate new list if current one is full. */ - if (!tp->md5sig_info) { - tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC); - if (!tp->md5sig_info) { - kfree(newkey); - return -ENOMEM; - } - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } - if (tp->md5sig_info->entries6 == 0 && - tcp_alloc_md5sig_pool(sk) == NULL) { - kfree(newkey); - return -ENOMEM; - } - if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) { - keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) * - (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC); - - if (!keys) { - kfree(newkey); - if (tp->md5sig_info->entries6 == 0) - tcp_free_md5sig_pool(); - return -ENOMEM; - } - - if (tp->md5sig_info->entries6) - memmove(keys, tp->md5sig_info->keys6, - (sizeof (tp->md5sig_info->keys6[0]) * - tp->md5sig_info->entries6)); - - kfree(tp->md5sig_info->keys6); - tp->md5sig_info->keys6 = keys; - tp->md5sig_info->alloced6++; - } - - tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer; - tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey; - tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen; - - tp->md5sig_info->entries6++; - } - return 0; -} - -static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer) -{ - struct tcp_sock *tp = tcp_sk(sk); - int i; - - for (i = 0; i < tp->md5sig_info->entries6; i++) { - if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) { - /* Free the key */ - kfree(tp->md5sig_info->keys6[i].base.key); - tp->md5sig_info->entries6--; - - if (tp->md5sig_info->entries6 == 0) { - kfree(tp->md5sig_info->keys6); - tp->md5sig_info->keys6 = NULL; - tp->md5sig_info->alloced6 = 0; - tcp_free_md5sig_pool(); - } else { - /* shrink the database */ - if (tp->md5sig_info->entries6 != i) - memmove(&tp->md5sig_info->keys6[i], - &tp->md5sig_info->keys6[i+1], - (tp->md5sig_info->entries6 - i) - * sizeof (tp->md5sig_info->keys6[0])); - } - return 0; - } - } - return -ENOENT; -} - -static void tcp_v6_clear_md5_list (struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - int i; - - if (tp->md5sig_info->entries6) { - for (i = 0; i < tp->md5sig_info->entries6; i++) - kfree(tp->md5sig_info->keys6[i].base.key); - tp->md5sig_info->entries6 = 0; - tcp_free_md5sig_pool(); - } - - kfree(tp->md5sig_info->keys6); - tp->md5sig_info->keys6 = NULL; - tp->md5sig_info->alloced6 = 0; - - if (tp->md5sig_info->entries4) { - for (i = 0; i < tp->md5sig_info->entries4; i++) - kfree(tp->md5sig_info->keys4[i].base.key); - tp->md5sig_info->entries4 = 0; - tcp_free_md5sig_pool(); - } - - kfree(tp->md5sig_info->keys4); - tp->md5sig_info->keys4 = NULL; - tp->md5sig_info->alloced4 = 0; -} - static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; - u8 *newkey; if (optlen < sizeof(cmd)) return -EINVAL; @@ -704,33 +574,21 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, if (!tcp_sk(sk)->md5sig_info) return -ENOENT; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]); - return tcp_v6_md5_do_del(sk, &sin6->sin6_addr); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + AF_INET); + return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, + AF_INET6); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (!tcp_sk(sk)->md5sig_info) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_info *p; - - p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL); - if (!p) - return -ENOMEM; - - tp->md5sig_info = p; - sk_nocaps_add(sk, NETIF_F_GSO_MASK); - } + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); - newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); - if (!newkey) - return -ENOMEM; - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { - return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3], - newkey, cmd.tcpm_keylen); - } - return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen); + return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, + AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, @@ -1503,10 +1361,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * memory, then we end up not copying the key * across. Shucks. */ - char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); - if (newkey != NULL) - tcp_v6_md5_do_add(newsk, &newnp->daddr, - newkey, key->keylen); + tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, + AF_INET6, key->key, key->keylen, GFP_ATOMIC); } #endif @@ -1995,11 +1851,6 @@ static int tcp_v6_init_sock(struct sock *sk) static void tcp_v6_destroy_sock(struct sock *sk) { -#ifdef CONFIG_TCP_MD5SIG - /* Clean up the MD5 key list */ - if (tcp_sk(sk)->md5sig_info) - tcp_v6_clear_md5_list(sk); -#endif tcp_v4_destroy_sock(sk); inet6_destroy_sock(sk); } -- cgit v1.2.3 From a8afca032998850ec63e83d555cdcf0eb5680cd6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 31 Jan 2012 18:45:40 +0000 Subject: tcp: md5: protects md5sig_info with RCU This patch makes sure we use appropriate memory barriers before publishing tp->md5sig_info, allowing tcp_md5_do_lookup() being used from tcp_v4_send_reset() without holding socket lock (upcoming patch from Shawn Lu) Note we also need to respect rcu grace period before its freeing, since we can free socket without this grace period thanks to SLAB_DESTROY_BY_RCU Signed-off-by: Eric Dumazet Cc: Shawn Lu Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 32 ++++++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 2 -- 4 files changed, 22 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c2025f159641..115389e9b945 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -463,7 +463,7 @@ struct tcp_sock { const struct tcp_sock_af_ops *af_specific; /* TCP MD5 Signature Option information */ - struct tcp_md5sig_info *md5sig_info; + struct tcp_md5sig_info __rcu *md5sig_info; #endif /* When the cookie options are generated and exchanged, then this diff --git a/include/net/tcp.h b/include/net/tcp.h index 10ae4c7b6b4f..78880ba0f560 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1150,6 +1150,7 @@ struct tcp_md5sig_key { /* - sock block */ struct tcp_md5sig_info { struct hlist_head head; + struct rcu_head rcu; }; /* - pseudo header */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index da5d3226771b..567cca9b30df 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -879,14 +879,18 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk, struct tcp_md5sig_key *key; struct hlist_node *pos; unsigned int size = sizeof(struct in_addr); + struct tcp_md5sig_info *md5sig; - if (!tp->md5sig_info) + /* caller either holds rcu_read_lock() or socket lock */ + md5sig = rcu_dereference_check(tp->md5sig_info, + sock_owned_by_user(sk)); + if (!md5sig) return NULL; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) size = sizeof(struct in6_addr); #endif - hlist_for_each_entry_rcu(key, pos, &tp->md5sig_info->head, node) { + hlist_for_each_entry_rcu(key, pos, &md5sig->head, node) { if (key->family != family) continue; if (!memcmp(&key->addr, addr, size)) @@ -932,7 +936,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return 0; } - md5sig = tp->md5sig_info; + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) @@ -940,7 +945,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, sk_nocaps_add(sk, NETIF_F_GSO_MASK); INIT_HLIST_HEAD(&md5sig->head); - tp->md5sig_info = md5sig; + rcu_assign_pointer(tp->md5sig_info, md5sig); } key = sock_kmalloc(sk, sizeof(*key), gfp); @@ -966,6 +971,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; + struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET); if (!key) @@ -973,7 +979,9 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - if (hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, + sock_owned_by_user(sk)); + if (hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); return 0; } @@ -984,10 +992,13 @@ void tcp_clear_md5_list(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; struct hlist_node *pos, *n; + struct tcp_md5sig_info *md5sig; - if (!hlist_empty(&tp->md5sig_info->head)) + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); + + if (!hlist_empty(&md5sig->head)) tcp_free_md5sig_pool(); - hlist_for_each_entry_safe(key, pos, n, &tp->md5sig_info->head, node) { + hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); @@ -1009,12 +1020,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; - if (!cmd.tcpm_key || !cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; + if (!cmd.tcpm_key || !cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, AF_INET); - } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; @@ -1896,7 +1904,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { tcp_clear_md5_list(sk); - kfree(tp->md5sig_info); + kfree_rcu(tp->md5sig_info, rcu); tp->md5sig_info = NULL; } #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bec41f9a6413..c25018106ef2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -571,8 +571,6 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, return -EINVAL; if (!cmd.tcpm_keylen) { - if (!tcp_sk(sk)->md5sig_info) - return -ENOENT; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET); -- cgit v1.2.3 From 5b11b2e4bdef20e839d90dce96c5bbeafaf9616c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 Jan 2012 21:45:26 +0000 Subject: xfrm6: remove unneeded NULL check in __xfrm6_output() We don't check for NULL consistently in __xfrm6_output(). If "x" were NULL here it would lead to an OOPs later. I asked Steffen Klassert about this and he suggested that we remove the NULL check. On 10/29/11, Steffen Klassert wrote: >> net/ipv6/xfrm6_output.c >> 148 >> 149 if ((x && x->props.mode == XFRM_MODE_TUNNEL) && >> ^ > > x can't be null here. It would be a bug if __xfrm6_output() is called > without a xfrm_state attached to the skb. I think we can just remove > this null check. Cc: Steffen Klassert Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv6/xfrm6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4eeff89c1aaa..8755a3079d0f 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -146,7 +146,7 @@ static int __xfrm6_output(struct sk_buff *skb) return -EMSGSIZE; } - if ((x && x->props.mode == XFRM_MODE_TUNNEL) && + if (x->props.mode == XFRM_MODE_TUNNEL && ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); -- cgit v1.2.3 From 658ddaaf6694adf63f67451dec9ddeb87a7cb2d7 Mon Sep 17 00:00:00 2001 From: Shawn Lu Date: Tue, 31 Jan 2012 22:35:48 +0000 Subject: tcp: md5: RST: getting md5 key from listener TCP RST mechanism is broken in TCP md5(RFC2385). When connection is gone, md5 key is lost, sending RST without md5 hash is deem to ignored by peer. This can be a problem since RST help protocal like bgp to fast recove from peer crash. In most case, users of tcp md5, such as bgp and ldp, have listener on both sides to accept connection from peer. md5 keys for peers are saved in listening socket. There are two cases in finding md5 key when connection is lost: 1.Passive receive RST: The message is send to well known port, tcp will associate it with listner. md5 key is gotten from listener. 2.Active receive RST (no sock): The message is send to ative side, there is no socket associated with the message. In this case, finding listener from source port, then find md5 key from listener. we are not loosing sercuriy here: packet is checked with md5 hash. No RST is generated if md5 hash doesn't match or no md5 key can be found. Signed-off-by: Shawn Lu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 45 ++++++++++++++++++++++++++++++++++++++++++--- net/ipv6/tcp_ipv6.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 567cca9b30df..90e47931e217 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -593,6 +593,10 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; + const __u8 *hash_location = NULL; + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; #endif struct net *net; @@ -623,9 +627,36 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_md5_do_lookup(sk, - (union tcp_md5_addr *)&ip_hdr(skb)->saddr, - AF_INET) : NULL; + hash_location = tcp_parse_md5sig_option(th); + if (!sk && hash_location) { + /* + * active side is lost. Try to find listening socket through + * source port, and then find md5 key through listening socket. + * we are not loose security here: + * Incoming packet is checked with md5 hash with finding key, + * no RST generated if md5 hash doesn't match. + */ + sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev), + &tcp_hashinfo, ip_hdr(skb)->daddr, + ntohs(th->source), inet_iif(skb)); + /* don't send rst if it can't find key */ + if (!sk1) + return; + rcu_read_lock(); + key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, AF_INET); + if (!key) + goto release_sk1; + + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) + goto release_sk1; + } else { + key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *) + &ip_hdr(skb)->saddr, + AF_INET) : NULL; + } + if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -653,6 +684,14 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); + +#ifdef CONFIG_TCP_MD5SIG +release_sk1: + if (sk1) { + rcu_read_unlock(); + sock_put(sk1); + } +#endif } /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c25018106ef2..d16414cb3421 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -923,6 +923,13 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; +#ifdef CONFIG_TCP_MD5SIG + const __u8 *hash_location = NULL; + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -931,8 +938,32 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) return; #ifdef CONFIG_TCP_MD5SIG - if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr); + hash_location = tcp_parse_md5sig_option(th); + if (!sk && hash_location) { + /* + * active side is lost. Try to find listening socket through + * source port, and then find md5 key through listening socket. + * we are not loose security here: + * Incoming packet is checked with md5 hash with finding key, + * no RST generated if md5 hash doesn't match. + */ + sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev), + &tcp_hashinfo, &ipv6h->daddr, + ntohs(th->source), inet6_iif(skb)); + if (!sk1) + return; + + rcu_read_lock(); + key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr); + if (!key) + goto release_sk1; + + genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb); + if (genhash || memcmp(hash_location, newhash, 16) != 0) + goto release_sk1; + } else { + key = sk ? tcp_v6_md5_do_lookup(sk, &ipv6h->saddr) : NULL; + } #endif if (th->ack) @@ -942,6 +973,14 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) (th->doff << 2); tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1, 0); + +#ifdef CONFIG_TCP_MD5SIG +release_sk1: + if (sk1) { + rcu_read_unlock(); + sock_put(sk1); + } +#endif } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, -- cgit v1.2.3 From f79d52c254e4e2cef3da64dc02ade3bc8f10c539 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Feb 2012 16:14:17 -0500 Subject: ipv6: Remove never used function inet6_ac_check(). It went from unused, to commented out, and never changing after that. Just get rid of it, if someone wants it they can unearth it from the history. Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 - net/ipv6/anycast.c | 29 ----------------------------- 2 files changed, 30 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f68dce2d8d88..757a17638b1b 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -160,7 +160,6 @@ extern void addrconf_prefix_rcv(struct net_device *dev, extern int ipv6_sock_ac_join(struct sock *sk,int ifindex, const struct in6_addr *addr); extern int ipv6_sock_ac_drop(struct sock *sk,int ifindex, const struct in6_addr *addr); extern void ipv6_sock_ac_close(struct sock *sk); -extern int inet6_ac_check(struct sock *sk, const struct in6_addr *addr, int ifindex); extern int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr); extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 59402b4637f9..db00d27ffb16 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -211,35 +211,6 @@ void ipv6_sock_ac_close(struct sock *sk) rcu_read_unlock(); } -#if 0 -/* The function is not used, which is funny. Apparently, author - * supposed to use it to filter out datagrams inside udp/raw but forgot. - * - * It is OK, anycasts are not special comparing to delivery to unicasts. - */ - -int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex) -{ - struct ipv6_ac_socklist *pac; - struct ipv6_pinfo *np = inet6_sk(sk); - int found; - - found = 0; - read_lock(&ipv6_sk_ac_lock); - for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) { - if (ifindex && pac->acl_ifindex != ifindex) - continue; - found = ipv6_addr_equal(&pac->acl_addr, addr); - if (found) - break; - } - read_unlock(&ipv6_sk_ac_lock); - - return found; -} - -#endif - static void aca_put(struct ifacaddr6 *ac) { if (atomic_dec_and_test(&ac->aca_refcnt)) { -- cgit v1.2.3 From c4062dfc425e94290ac427a98d6b4721dd2bc91f Mon Sep 17 00:00:00 2001 From: "Erich E. Hoover" Date: Wed, 8 Feb 2012 09:11:08 +0000 Subject: ipv6: Implement IPV6_UNICAST_IF socket option. The IPV6_UNICAST_IF feature is the IPv6 compliment to IP_UNICAST_IF. Signed-off-by: Erich E. Hoover Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/linux/ipv6.h | 1 + net/ipv6/icmp.c | 4 ++++ net/ipv6/ipv6_sockglue.c | 34 ++++++++++++++++++++++++++++++++++ net/ipv6/raw.c | 2 ++ net/ipv6/udp.c | 3 ++- 6 files changed, 44 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/include/linux/in6.h b/include/linux/in6.h index 097a34b55560..5c83d9e3eb8f 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -271,6 +271,7 @@ struct in6_flowlabel_req { #define IPV6_ORIGDSTADDR 74 #define IPV6_RECVORIGDSTADDR IPV6_ORIGDSTADDR #define IPV6_TRANSPARENT 75 +#define IPV6_UNICAST_IF 76 /* * Multicast Routing: diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6318268dcaf5..743a16a41040 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -324,6 +324,7 @@ struct ipv6_pinfo { __unused_2:6; __s16 mcast_hops:9; #endif + int ucast_oif; int mcast_oif; /* pktoption flags */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 01d46bff63c3..af88934e4d79 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -468,6 +468,8 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) @@ -553,6 +555,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; err = ip6_dst_lookup(sk, &dst, &fl6); if (err) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 18a2719003c3..6d6b65fdaa1a 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -516,6 +516,36 @@ done: retv = 0; break; + case IPV6_UNICAST_IF: + { + struct net_device *dev = NULL; + int ifindex; + + if (optlen != sizeof(int)) + goto e_inval; + + ifindex = (__force int)ntohl((__force __be32)val); + if (ifindex == 0) { + np->ucast_oif = 0; + retv = 0; + break; + } + + dev = dev_get_by_index(net, ifindex); + retv = -EADDRNOTAVAIL; + if (!dev) + break; + dev_put(dev); + + retv = -EINVAL; + if (sk->sk_bound_dev_if) + break; + + np->ucast_oif = ifindex; + retv = 0; + break; + } + case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) break; @@ -1160,6 +1190,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->mcast_oif; break; + case IPV6_UNICAST_IF: + val = (__force int)htonl((__u32) np->ucast_oif); + break; + case IPV6_MTU_DISCOVER: val = np->pmtudisc; break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d02f7e4dd611..5bddea778840 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -856,6 +856,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4f96b5c63685..8aebf8f90436 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1130,7 +1130,8 @@ do_udp_sendmsg: if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) { fl6.flowi6_oif = np->mcast_oif; connected = 0; - } + } else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); -- cgit v1.2.3 From 7a3198a89722ad9521d22b05938d357eac7460fa Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 9 Feb 2012 09:34:41 +0000 Subject: ipv6: helper function to get tclass Implement helper inline function to get traffic class from IPv6 header. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++++ net/ipv6/datagram.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 743a16a41040..4847a64d3c0a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -233,6 +233,11 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) return (struct ipv6hdr *)skb_transport_header(skb); } +static inline __u8 ipv6_tclass(const struct ipv6hdr *iph) +{ + return (ntohl(*(__be32 *)iph) >> 20) & 0xff; +} + /* This structure contains results of exthdrs parsing as offsets from skb->nh. diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 251e7cd75e89..76832c8dc89d 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -485,7 +485,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) } if (np->rxopt.bits.rxtclass) { - int tclass = (ntohl(*(__be32 *)ipv6_hdr(skb)) >> 20) & 0xff; + int tclass = ipv6_tclass(ipv6_hdr(skb)); put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } -- cgit v1.2.3 From 4c507d2897bd9be810b3403ade73b04cf6fdfd4a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 9 Feb 2012 09:35:49 +0000 Subject: net: implement IP_RECVTOS for IP_PKTOPTIONS Currently, it is not easily possible to get TOS/DSCP value of packets from an incoming TCP stream. The mechanism is there, IP_PKTOPTIONS getsockopt with IP_RECVTOS set, the same way as incoming TTL can be queried. This is not actually implemented for TOS, though. This patch adds this functionality, both for IPv4 (IP_PKTOPTIONS) and IPv6 (IPV6_2292PKTOPTIONS). For IPv4, like in the IP_RECVTTL case, the value of the TOS field is stored from the other party's ACK. This is needed for proxies which require DSCP transparency. One such example is at http://zph.bratcheda.org/. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/inet_sock.h | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/ip_sockglue.c | 4 ++++ net/ipv4/tcp_ipv4.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/ipv6_sockglue.c | 4 ++++ net/ipv6/tcp_ipv6.c | 4 ++++ 8 files changed, 17 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4847a64d3c0a..8260ef779762 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -366,7 +366,7 @@ struct ipv6_pinfo { dontfrag:1; __u8 min_hopcount; __u8 tclass; - __u8 padding; + __u8 rcv_tclass; __u32 dst_cookie; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 022f772c0ebe..ae17e1352d7e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -168,6 +168,7 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; + __u8 rcv_tos; int uc_index; int mc_index; __be32 mc_addr; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f7b5670744f0..e588a34e85c2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -381,6 +381,7 @@ lookup_protocol: inet->mc_all = 1; inet->mc_index = 0; inet->mc_list = NULL; + inet->rcv_tos = 0; sk_refcnt_debug_inc(sk); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 9125529dab95..ca50d9f9f8c1 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1289,6 +1289,10 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, int hlim = inet->mc_ttl; put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); } + if (inet->cmsg_flags & IP_CMSG_TOS) { + int tos = inet->rcv_tos; + put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); + } len -= msg.msg_controllen; return put_user(len, optlen); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4d6f81c818dc..94abee8cf563 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1463,6 +1463,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->rcv_tos = ip_hdr(skb)->tos; inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 273f48d1df2e..5605f9dca87e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -214,6 +214,7 @@ lookup_protocol: inet->mc_ttl = 1; inet->mc_index = 0; inet->mc_list = NULL; + inet->rcv_tos = 0; if (ipv4_config.no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 6d6b65fdaa1a..63dd1f89ed7d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1017,6 +1017,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxtclass) { + int tclass = np->rcv_tclass; + put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); + } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d16414cb3421..12c6ece67f39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1282,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1360,6 +1361,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1562,6 +1564,8 @@ ipv6_pktoptions: np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + if (np->rxopt.bits.rxtclass) + np->rcv_tclass = ipv6_tclass(ipv6_hdr(skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); -- cgit v1.2.3 From 3f518bf745cbd6007d8069100fb9cb09e960c872 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:30:58 +0000 Subject: datagram: Add offset argument to __skb_recv_datagram This one is only considered for MSG_PEEK flag and the value pointed by it specifies where to start peeking bytes from. If the offset happens to point into the middle of the returned skb, the offset within this skb is put back to this very argument. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/datagram.c | 21 +++++++++++++-------- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 4 files changed, 18 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b7317ff297f..f3cf43de3c2a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2046,7 +2046,7 @@ static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) extern struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err); + int *peeked, int *off, int *err); extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); extern unsigned int datagram_poll(struct file *file, struct socket *sock, diff --git a/net/core/datagram.c b/net/core/datagram.c index 6f54d0a17f8e..d3cf12f62c8f 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -132,6 +132,8 @@ out_noerr: * __skb_recv_datagram - Receive a datagram skbuff * @sk: socket * @flags: MSG_ flags + * @off: an offset in bytes to peek skb from. Returns an offset + * within an skb where data actually starts * @peeked: returns non-zero if this packet has been seen before * @err: error code returned * @@ -158,7 +160,7 @@ out_noerr: * the standard around please. */ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, - int *peeked, int *err) + int *peeked, int *off, int *err) { struct sk_buff *skb; long timeo; @@ -183,19 +185,22 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, struct sk_buff_head *queue = &sk->sk_receive_queue; spin_lock_irqsave(&queue->lock, cpu_flags); - skb = skb_peek(queue); - if (skb) { + skb_queue_walk(queue, skb) { *peeked = skb->peeked; if (flags & MSG_PEEK) { + if (*off >= skb->len) { + *off -= skb->len; + continue; + } skb->peeked = 1; atomic_inc(&skb->users); } else __skb_unlink(skb, queue); - } - spin_unlock_irqrestore(&queue->lock, cpu_flags); - if (skb) + spin_unlock_irqrestore(&queue->lock, cpu_flags); return skb; + } + spin_unlock_irqrestore(&queue->lock, cpu_flags); /* User doesn't want to wait */ error = -EAGAIN; @@ -215,10 +220,10 @@ EXPORT_SYMBOL(__skb_recv_datagram); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err) { - int peeked; + int peeked, off = 0; return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, err); + &peeked, &off, err); } EXPORT_SYMBOL(skb_recv_datagram); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cd99f1a0f59f..7c41ab84e72e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1167,7 +1167,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; unsigned int ulen, copied; - int peeked; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); bool slow; @@ -1183,7 +1183,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8aebf8f90436..37b0699e95e5 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -342,7 +342,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; unsigned int ulen, copied; - int peeked; + int peeked, off = 0; int err; int is_udplite = IS_UDPLITE(sk); int is_udp4; @@ -359,7 +359,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); + &peeked, &off, &err); if (!skb) goto out; -- cgit v1.2.3 From 6939c33a757bd006c5e0b8b5fd429fc587a4d0f4 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 10 Feb 2012 23:10:52 +0100 Subject: netfilter: merge ipt_LOG and ip6_LOG into xt_LOG ipt_LOG and ip6_LOG have a lot of common code, merge them to reduce duplicate code. Signed-off-by: Richard Weinberger Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_LOG.h | 19 + include/linux/netfilter_ipv4/ipt_LOG.h | 2 + include/linux/netfilter_ipv6/ip6t_LOG.h | 2 + net/ipv4/netfilter/Kconfig | 9 - net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_LOG.c | 516 ------------------ net/ipv6/netfilter/Kconfig | 9 - net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_LOG.c | 527 ------------------ net/netfilter/Kconfig | 9 + net/netfilter/Makefile | 1 + net/netfilter/xt_LOG.c | 921 ++++++++++++++++++++++++++++++++ 13 files changed, 955 insertions(+), 1063 deletions(-) create mode 100644 include/linux/netfilter/xt_LOG.h delete mode 100644 net/ipv4/netfilter/ipt_LOG.c delete mode 100644 net/ipv6/netfilter/ip6t_LOG.c create mode 100644 net/netfilter/xt_LOG.c (limited to 'net/ipv6') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index e144f54185c0..aaa72aaa21de 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -22,6 +22,7 @@ header-y += xt_CT.h header-y += xt_DSCP.h header-y += xt_IDLETIMER.h header-y += xt_LED.h +header-y += xt_LOG.h header-y += xt_MARK.h header-y += xt_nfacct.h header-y += xt_NFLOG.h diff --git a/include/linux/netfilter/xt_LOG.h b/include/linux/netfilter/xt_LOG.h new file mode 100644 index 000000000000..cac079095305 --- /dev/null +++ b/include/linux/netfilter/xt_LOG.h @@ -0,0 +1,19 @@ +#ifndef _XT_LOG_H +#define _XT_LOG_H + +/* make sure not to change this without changing nf_log.h:NF_LOG_* (!) */ +#define XT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define XT_LOG_TCPOPT 0x02 /* Log TCP options */ +#define XT_LOG_IPOPT 0x04 /* Log IP options */ +#define XT_LOG_UID 0x08 /* Log UID owning local socket */ +#define XT_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define XT_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define XT_LOG_MASK 0x2f + +struct xt_log_info { + unsigned char level; + unsigned char logflags; + char prefix[30]; +}; + +#endif /* _XT_LOG_H */ diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index dcdbadf9fd4a..5d8152077d71 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -1,6 +1,8 @@ #ifndef _IPT_LOG_H #define _IPT_LOG_H +#warning "Please update iptables, this file will be removed soon!" + /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 9dd5579e02ec..3dd0bc4e0735 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -1,6 +1,8 @@ #ifndef _IP6T_LOG_H #define _IP6T_LOG_H +#warning "Please update iptables, this file will be removed soon!" + /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 74dfc9e5211f..fcc543cd987a 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -123,15 +123,6 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_LOG - tristate "LOG target support" - default m if NETFILTER_ADVANCED=n - help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_ULOG tristate "ULOG target support" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 213a462b739b..240b68469a7a 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -54,7 +54,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c deleted file mode 100644 index d76d6c9ed946..000000000000 --- a/net/ipv4/netfilter/ipt_LOG.c +++ /dev/null @@ -1,516 +0,0 @@ -/* - * This is a module which is used for logging packets. - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team "); -MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog"); - -/* One level of recursion won't kill us */ -static void dump_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, - unsigned int iphoff) -{ - struct iphdr _iph; - const struct iphdr *ih; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Important fields: - * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ - /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - sb_add(m, "SRC=%pI4 DST=%pI4 ", - &ih->saddr, &ih->daddr); - - /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ - sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, - ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); - - /* Max length: 6 "CE DF MF " */ - if (ntohs(ih->frag_off) & IP_CE) - sb_add(m, "CE "); - if (ntohs(ih->frag_off) & IP_DF) - sb_add(m, "DF "); - if (ntohs(ih->frag_off) & IP_MF) - sb_add(m, "MF "); - - /* Max length: 11 "FRAG:65535 " */ - if (ntohs(ih->frag_off) & IP_OFFSET) - sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - - if ((logflags & IPT_LOG_IPOPT) && - ih->ihl * 4 > sizeof(struct iphdr)) { - const unsigned char *op; - unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; - unsigned int i, optsize; - - optsize = ih->ihl * 4 - sizeof(struct iphdr); - op = skb_header_pointer(skb, iphoff+sizeof(_iph), - optsize, _opt); - if (op == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - - switch (ih->protocol) { - case IPPROTO_TCP: { - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - sb_add(m, "PROTO=TCP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u ", - ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & IPT_LOG_TCPSEQ) - sb_add(m, "SEQ=%u ACK=%u ", - ntohl(th->seq), ntohl(th->ack_seq)); - /* Max length: 13 "WINDOW=65535 " */ - sb_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3F " */ - sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - sb_add(m, "CWR "); - if (th->ece) - sb_add(m, "ECE "); - if (th->urg) - sb_add(m, "URG "); - if (th->ack) - sb_add(m, "ACK "); - if (th->psh) - sb_add(m, "PSH "); - if (th->rst) - sb_add(m, "RST "); - if (th->syn) - sb_add(m, "SYN "); - if (th->fin) - sb_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & IPT_LOG_TCPOPT) && - th->doff * 4 > sizeof(struct tcphdr)) { - unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; - const unsigned char *op; - unsigned int i, optsize; - - optsize = th->doff * 4 - sizeof(struct tcphdr); - op = skb_header_pointer(skb, - iphoff+ih->ihl*4+sizeof(_tcph), - optsize, _opt); - if (op == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i = 0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - break; - } - case IPPROTO_UDP: - case IPPROTO_UDPLITE: { - struct udphdr _udph; - const struct udphdr *uh; - - if (ih->protocol == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - sb_add(m, "PROTO=UDP " ); - else /* Max length: 14 "PROTO=UDPLITE " */ - sb_add(m, "PROTO=UDPLITE "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_udph), &_udph); - if (uh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u LEN=%u ", - ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - break; - } - case IPPROTO_ICMP: { - struct icmphdr _icmph; - const struct icmphdr *ich; - static const size_t required_len[NR_ICMP_TYPES+1] - = { [ICMP_ECHOREPLY] = 4, - [ICMP_DEST_UNREACH] - = 8 + sizeof(struct iphdr), - [ICMP_SOURCE_QUENCH] - = 8 + sizeof(struct iphdr), - [ICMP_REDIRECT] - = 8 + sizeof(struct iphdr), - [ICMP_ECHO] = 4, - [ICMP_TIME_EXCEEDED] - = 8 + sizeof(struct iphdr), - [ICMP_PARAMETERPROB] - = 8 + sizeof(struct iphdr), - [ICMP_TIMESTAMP] = 20, - [ICMP_TIMESTAMPREPLY] = 20, - [ICMP_ADDRESS] = 12, - [ICMP_ADDRESSREPLY] = 12 }; - - /* Max length: 11 "PROTO=ICMP " */ - sb_add(m, "PROTO=ICMP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_icmph), &_icmph); - if (ich == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES && - required_len[ich->type] && - skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - switch (ich->type) { - case ICMP_ECHOREPLY: - case ICMP_ECHO: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ich->un.echo.id), - ntohs(ich->un.echo.sequence)); - break; - - case ICMP_PARAMETERPROB: - /* Max length: 14 "PARAMETER=255 " */ - sb_add(m, "PARAMETER=%u ", - ntohl(ich->un.gateway) >> 24); - break; - case ICMP_REDIRECT: - /* Max length: 24 "GATEWAY=255.255.255.255 " */ - sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); - /* Fall through */ - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - /* Max length: 3+maxlen */ - if (!iphoff) { /* Only recurse once. */ - sb_add(m, "["); - dump_packet(m, info, skb, - iphoff + ih->ihl*4+sizeof(_icmph)); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH && - ich->code == ICMP_FRAG_NEEDED) - sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); - } - break; - } - /* Max Length */ - case IPPROTO_AH: { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 9 "PROTO=AH " */ - sb_add(m, "PROTO=AH "); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ah = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_ahdr), &_ahdr); - if (ah == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - break; - } - case IPPROTO_ESP: { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 10 "PROTO=ESP " */ - sb_add(m, "PROTO=ESP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - eh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_esph), &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - sb_add(m, "SPI=0x%x ", ntohl(eh->spi)); - break; - } - /* Max length: 10 "PROTO 255 " */ - default: - sb_add(m, "PROTO=%u ", ih->protocol); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!iphoff && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); - - /* Proto Max log string length */ - /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ - /* UDP: 10+max(25,20) = 35 */ - /* UDPLITE: 14+max(25,20) = 39 */ - /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ - /* ESP: 10+max(25)+15 = 50 */ - /* AH: 9+max(25)+15 = 49 */ - /* unknown: 10 */ - - /* (ICMP allows recursion one level deep) */ - /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ -} - -static void dump_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & IPT_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int i; - - sb_add(m, "%02x", *p++); - for (i = 1; i < dev->hard_header_len; i++, p++) - sb_add(m, ":%02x", *p); - } - sb_add(m, " "); -} - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 5, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void -ipt_log_packet(u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, - in ? in->name : "", - out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - const struct net_device *physindev; - const struct net_device *physoutdev; - - physindev = skb->nf_bridge->physindev; - if (physindev && in != physindev) - sb_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = skb->nf_bridge->physoutdev; - if (physoutdev && out != physoutdev) - sb_add(m, "PHYSOUT=%s ", physoutdev->name); - } -#endif - - if (in != NULL) - dump_mac_header(m, loginfo, skb); - - dump_packet(m, loginfo, skb, 0); - - sb_close(m); -} - -static unsigned int -log_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct ipt_log_info *loginfo = par->targinfo; - struct nf_loginfo li; - - li.type = NF_LOG_TYPE_LOG; - li.u.log.level = loginfo->level; - li.u.log.logflags = loginfo->logflags; - - ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, - loginfo->prefix); - return XT_CONTINUE; -} - -static int log_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_log_info *loginfo = par->targinfo; - - if (loginfo->level >= 8) { - pr_debug("level %u >= 8\n", loginfo->level); - return -EINVAL; - } - if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - pr_debug("prefix is not null-terminated\n"); - return -EINVAL; - } - return 0; -} - -static struct xt_target log_tg_reg __read_mostly = { - .name = "LOG", - .family = NFPROTO_IPV4, - .target = log_tg, - .targetsize = sizeof(struct ipt_log_info), - .checkentry = log_tg_check, - .me = THIS_MODULE, -}; - -static struct nf_logger ipt_log_logger __read_mostly = { - .name = "ipt_LOG", - .logfn = &ipt_log_packet, - .me = THIS_MODULE, -}; - -static int __init log_tg_init(void) -{ - int ret; - - ret = xt_register_target(&log_tg_reg); - if (ret < 0) - return ret; - nf_log_register(NFPROTO_IPV4, &ipt_log_logger); - return 0; -} - -static void __exit log_tg_exit(void) -{ - nf_log_unregister(&ipt_log_logger); - xt_unregister_target(&log_tg_reg); -} - -module_init(log_tg_init); -module_exit(log_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 9a68fb5b9e77..d33cddd16fbb 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -154,15 +154,6 @@ config IP6_NF_TARGET_HL (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. -config IP6_NF_TARGET_LOG - tristate "LOG target support" - default m if NETFILTER_ADVANCED=n - help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2eaed96db02c..d4dfd0a21097 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -31,5 +31,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets -obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c deleted file mode 100644 index e6af8d72f26b..000000000000 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ /dev/null @@ -1,527 +0,0 @@ -/* - * This is a module which is used for logging packets. - */ - -/* (C) 2001 Jan Rekorajski - * (C) 2002-2004 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Jan Rekorajski "); -MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); -MODULE_LICENSE("GPL"); - -struct in_device; -#include -#include - -/* One level of recursion won't kill us */ -static void dump_packet(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb, unsigned int ip6hoff, - int recurse) -{ - u_int8_t currenthdr; - int fragment; - struct ipv6hdr _ip6h; - const struct ipv6hdr *ih; - unsigned int ptr; - unsigned int hdrlen = 0; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); - if (ih == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); - - /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ - sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", - ntohs(ih->payload_len) + sizeof(struct ipv6hdr), - (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, - ih->hop_limit, - (ntohl(*(__be32 *)ih) & 0x000fffff)); - - fragment = 0; - ptr = ip6hoff + sizeof(struct ipv6hdr); - currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { - struct ipv6_opt_hdr _hdr; - const struct ipv6_opt_hdr *hp; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - if (hp == NULL) { - sb_add(m, "TRUNCATED"); - return; - } - - /* Max length: 48 "OPT (...) " */ - if (logflags & IP6T_LOG_IPOPT) - sb_add(m, "OPT ( "); - - switch (currenthdr) { - case IPPROTO_FRAGMENT: { - struct frag_hdr _fhdr; - const struct frag_hdr *fh; - - sb_add(m, "FRAG:"); - fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), - &_fhdr); - if (fh == NULL) { - sb_add(m, "TRUNCATED "); - return; - } - - /* Max length: 6 "65535 " */ - sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); - - /* Max length: 11 "INCOMPLETE " */ - if (fh->frag_off & htons(0x0001)) - sb_add(m, "INCOMPLETE "); - - sb_add(m, "ID:%08x ", ntohl(fh->identification)); - - if (ntohs(fh->frag_off) & 0xFFF8) - fragment = 1; - - hdrlen = 8; - - break; - } - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - case IPPROTO_HOPOPTS: - if (fragment) { - if (logflags & IP6T_LOG_IPOPT) - sb_add(m, ")"); - return; - } - hdrlen = ipv6_optlen(hp); - break; - /* Max Length */ - case IPPROTO_AH: - if (logflags & IP6T_LOG_IPOPT) { - struct ip_auth_hdr _ahdr; - const struct ip_auth_hdr *ah; - - /* Max length: 3 "AH " */ - sb_add(m, "AH "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), - &_ahdr); - if (ah == NULL) { - /* - * Max length: 26 "INCOMPLETE [65535 - * bytes] )" - */ - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 15 "SPI=0xF1234567 */ - sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); - - } - - hdrlen = (hp->hdrlen+2)<<2; - break; - case IPPROTO_ESP: - if (logflags & IP6T_LOG_IPOPT) { - struct ip_esp_hdr _esph; - const struct ip_esp_hdr *eh; - - /* Max length: 4 "ESP " */ - sb_add(m, "ESP "); - - if (fragment) { - sb_add(m, ")"); - return; - } - - /* - * Max length: 26 "INCOMPLETE [65535 bytes] )" - */ - eh = skb_header_pointer(skb, ptr, sizeof(_esph), - &_esph); - if (eh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] )", - skb->len - ptr); - return; - } - - /* Length: 16 "SPI=0xF1234567 )" */ - sb_add(m, "SPI=0x%x )", ntohl(eh->spi) ); - - } - return; - default: - /* Max length: 20 "Unknown Ext Hdr 255" */ - sb_add(m, "Unknown Ext Hdr %u", currenthdr); - return; - } - if (logflags & IP6T_LOG_IPOPT) - sb_add(m, ") "); - - currenthdr = hp->nexthdr; - ptr += hdrlen; - } - - switch (currenthdr) { - case IPPROTO_TCP: { - struct tcphdr _tcph; - const struct tcphdr *th; - - /* Max length: 10 "PROTO=TCP " */ - sb_add(m, "PROTO=TCP "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph); - if (th == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u ", - ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & IP6T_LOG_TCPSEQ) - sb_add(m, "SEQ=%u ACK=%u ", - ntohl(th->seq), ntohl(th->ack_seq)); - /* Max length: 13 "WINDOW=65535 " */ - sb_add(m, "WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3C " */ - sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - sb_add(m, "CWR "); - if (th->ece) - sb_add(m, "ECE "); - if (th->urg) - sb_add(m, "URG "); - if (th->ack) - sb_add(m, "ACK "); - if (th->psh) - sb_add(m, "PSH "); - if (th->rst) - sb_add(m, "RST "); - if (th->syn) - sb_add(m, "SYN "); - if (th->fin) - sb_add(m, "FIN "); - /* Max length: 11 "URGP=65535 " */ - sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & IP6T_LOG_TCPOPT) && - th->doff * 4 > sizeof(struct tcphdr)) { - u_int8_t _opt[60 - sizeof(struct tcphdr)]; - const u_int8_t *op; - unsigned int i; - unsigned int optsize = th->doff * 4 - - sizeof(struct tcphdr); - - op = skb_header_pointer(skb, - ptr + sizeof(struct tcphdr), - optsize, _opt); - if (op == NULL) { - sb_add(m, "OPT (TRUNCATED)"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - sb_add(m, "OPT ("); - for (i =0; i < optsize; i++) - sb_add(m, "%02X", op[i]); - sb_add(m, ") "); - } - break; - } - case IPPROTO_UDP: - case IPPROTO_UDPLITE: { - struct udphdr _udph; - const struct udphdr *uh; - - if (currenthdr == IPPROTO_UDP) - /* Max length: 10 "PROTO=UDP " */ - sb_add(m, "PROTO=UDP " ); - else /* Max length: 14 "PROTO=UDPLITE " */ - sb_add(m, "PROTO=UDPLITE "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph); - if (uh == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - sb_add(m, "SPT=%u DPT=%u LEN=%u ", - ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - break; - } - case IPPROTO_ICMPV6: { - struct icmp6hdr _icmp6h; - const struct icmp6hdr *ic; - - /* Max length: 13 "PROTO=ICMPv6 " */ - sb_add(m, "PROTO=ICMPv6 "); - - if (fragment) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); - if (ic == NULL) { - sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); - return; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); - - switch (ic->icmp6_type) { - case ICMPV6_ECHO_REQUEST: - case ICMPV6_ECHO_REPLY: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - sb_add(m, "ID=%u SEQ=%u ", - ntohs(ic->icmp6_identifier), - ntohs(ic->icmp6_sequence)); - break; - case ICMPV6_MGM_QUERY: - case ICMPV6_MGM_REPORT: - case ICMPV6_MGM_REDUCTION: - break; - - case ICMPV6_PARAMPROB: - /* Max length: 17 "POINTER=ffffffff " */ - sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); - /* Fall through */ - case ICMPV6_DEST_UNREACH: - case ICMPV6_PKT_TOOBIG: - case ICMPV6_TIME_EXCEED: - /* Max length: 3+maxlen */ - if (recurse) { - sb_add(m, "["); - dump_packet(m, info, skb, - ptr + sizeof(_icmp6h), 0); - sb_add(m, "] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) - sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); - } - break; - } - /* Max length: 10 "PROTO=255 " */ - default: - sb_add(m, "PROTO=%u ", currenthdr); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - sb_add(m, "UID=%u GID=%u ", - skb->sk->sk_socket->file->f_cred->fsuid, - skb->sk->sk_socket->file->f_cred->fsgid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } - - /* Max length: 16 "MARK=0xFFFFFFFF " */ - if (!recurse && skb->mark) - sb_add(m, "MARK=0x%x ", skb->mark); -} - -static void dump_mac_header(struct sbuff *m, - const struct nf_loginfo *info, - const struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - unsigned int logflags = 0; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - - if (!(logflags & IP6T_LOG_MACDECODE)) - goto fallback; - - switch (dev->type) { - case ARPHRD_ETHER: - sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", - eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, - ntohs(eth_hdr(skb)->h_proto)); - return; - default: - break; - } - -fallback: - sb_add(m, "MAC="); - if (dev->hard_header_len && - skb->mac_header != skb->network_header) { - const unsigned char *p = skb_mac_header(skb); - unsigned int len = dev->hard_header_len; - unsigned int i; - - if (dev->type == ARPHRD_SIT && - (p -= ETH_HLEN) < skb->head) - p = NULL; - - if (p != NULL) { - sb_add(m, "%02x", *p++); - for (i = 1; i < len; i++) - sb_add(m, ":%02x", *p++); - } - sb_add(m, " "); - - if (dev->type == ARPHRD_SIT) { - const struct iphdr *iph = - (struct iphdr *)skb_mac_header(skb); - sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); - } - } else - sb_add(m, " "); -} - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 5, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void -ip6t_log_packet(u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - struct sbuff *m = sb_open(); - - if (!loginfo) - loginfo = &default_loginfo; - - sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, - in ? in->name : "", - out ? out->name : ""); - - if (in != NULL) - dump_mac_header(m, loginfo, skb); - - dump_packet(m, loginfo, skb, skb_network_offset(skb), 1); - - sb_close(m); -} - -static unsigned int -log_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct ip6t_log_info *loginfo = par->targinfo; - struct nf_loginfo li; - - li.type = NF_LOG_TYPE_LOG; - li.u.log.level = loginfo->level; - li.u.log.logflags = loginfo->logflags; - - ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, - &li, loginfo->prefix); - return XT_CONTINUE; -} - - -static int log_tg6_check(const struct xt_tgchk_param *par) -{ - const struct ip6t_log_info *loginfo = par->targinfo; - - if (loginfo->level >= 8) { - pr_debug("level %u >= 8\n", loginfo->level); - return -EINVAL; - } - if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - pr_debug("prefix not null-terminated\n"); - return -EINVAL; - } - return 0; -} - -static struct xt_target log_tg6_reg __read_mostly = { - .name = "LOG", - .family = NFPROTO_IPV6, - .target = log_tg6, - .targetsize = sizeof(struct ip6t_log_info), - .checkentry = log_tg6_check, - .me = THIS_MODULE, -}; - -static struct nf_logger ip6t_logger __read_mostly = { - .name = "ip6t_LOG", - .logfn = &ip6t_log_packet, - .me = THIS_MODULE, -}; - -static int __init log_tg6_init(void) -{ - int ret; - - ret = xt_register_target(&log_tg6_reg); - if (ret < 0) - return ret; - nf_log_register(NFPROTO_IPV6, &ip6t_logger); - return 0; -} - -static void __exit log_tg6_exit(void) -{ - nf_log_unregister(&ip6t_logger); - xt_unregister_target(&log_tg6_reg); -} - -module_init(log_tg6_init); -module_exit(log_tg6_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f8ac4ef0b794..b895d8b13215 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -524,6 +524,15 @@ config NETFILTER_XT_TARGET_LED For more information on the LEDs available on your system, see Documentation/leds/leds-class.txt +config NETFILTER_XT_TARGET_LOG + tristate "LOG target support" + default m if NETFILTER_ADVANCED=n + help + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 40f4c3d636c5..a28c2a6563f8 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o +obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c new file mode 100644 index 000000000000..1595608a892d --- /dev/null +++ b/net/netfilter/xt_LOG.c @@ -0,0 +1,921 @@ +/* + * This is a module which is used for logging packets. + */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 5, + .logflags = NF_LOG_MASK, + }, + }, +}; + +static int dump_udp_header(struct sbuff *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset) +{ + struct udphdr _udph; + const struct udphdr *uh; + + if (proto == IPPROTO_UDP) + /* Max length: 10 "PROTO=UDP " */ + sb_add(m, "PROTO=UDP "); + else /* Max length: 14 "PROTO=UDPLITE " */ + sb_add(m, "PROTO=UDPLITE "); + + if (fragment) + goto out; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + sb_add(m, "SPT=%u DPT=%u LEN=%u ", ntohs(uh->source), ntohs(uh->dest), + ntohs(uh->len)); + +out: + return 0; +} + +static int dump_tcp_header(struct sbuff *m, const struct sk_buff *skb, + u8 proto, int fragment, unsigned int offset, + unsigned int logflags) +{ + struct tcphdr _tcph; + const struct tcphdr *th; + + /* Max length: 10 "PROTO=TCP " */ + sb_add(m, "PROTO=TCP "); + + if (fragment) + return 0; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - offset); + return 1; + } + + /* Max length: 20 "SPT=65535 DPT=65535 " */ + sb_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); + /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ + if (logflags & XT_LOG_TCPSEQ) + sb_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); + + /* Max length: 13 "WINDOW=65535 " */ + sb_add(m, "WINDOW=%u ", ntohs(th->window)); + /* Max length: 9 "RES=0x3C " */ + sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & + TCP_RESERVED_BITS) >> 22)); + /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ + if (th->cwr) + sb_add(m, "CWR "); + if (th->ece) + sb_add(m, "ECE "); + if (th->urg) + sb_add(m, "URG "); + if (th->ack) + sb_add(m, "ACK "); + if (th->psh) + sb_add(m, "PSH "); + if (th->rst) + sb_add(m, "RST "); + if (th->syn) + sb_add(m, "SYN "); + if (th->fin) + sb_add(m, "FIN "); + /* Max length: 11 "URGP=65535 " */ + sb_add(m, "URGP=%u ", ntohs(th->urg_ptr)); + + if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + u_int8_t _opt[60 - sizeof(struct tcphdr)]; + const u_int8_t *op; + unsigned int i; + unsigned int optsize = th->doff*4 - sizeof(struct tcphdr); + + op = skb_header_pointer(skb, offset + sizeof(struct tcphdr), + optsize, _opt); + if (op == NULL) { + sb_add(m, "OPT (TRUNCATED)"); + return 1; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + sb_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + sb_add(m, "%02X", op[i]); + + sb_add(m, ") "); + } + + return 0; +} + +/* One level of recursion won't kill us */ +static void dump_ipv4_packet(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, + unsigned int iphoff) +{ + struct iphdr _iph; + const struct iphdr *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); + if (ih == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Important fields: + * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ + /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ + sb_add(m, "SRC=%pI4 DST=%pI4 ", + &ih->saddr, &ih->daddr); + + /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ + sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", + ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, + ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); + + /* Max length: 6 "CE DF MF " */ + if (ntohs(ih->frag_off) & IP_CE) + sb_add(m, "CE "); + if (ntohs(ih->frag_off) & IP_DF) + sb_add(m, "DF "); + if (ntohs(ih->frag_off) & IP_MF) + sb_add(m, "MF "); + + /* Max length: 11 "FRAG:65535 " */ + if (ntohs(ih->frag_off) & IP_OFFSET) + sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); + + if ((logflags & XT_LOG_IPOPT) && + ih->ihl * 4 > sizeof(struct iphdr)) { + const unsigned char *op; + unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; + unsigned int i, optsize; + + optsize = ih->ihl * 4 - sizeof(struct iphdr); + op = skb_header_pointer(skb, iphoff+sizeof(_iph), + optsize, _opt); + if (op == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Max length: 127 "OPT (" 15*4*2chars ") " */ + sb_add(m, "OPT ("); + for (i = 0; i < optsize; i++) + sb_add(m, "%02X", op[i]); + sb_add(m, ") "); + } + + switch (ih->protocol) { + case IPPROTO_TCP: + if (dump_tcp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4, logflags)) + return; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (dump_udp_header(m, skb, ih->protocol, + ntohs(ih->frag_off) & IP_OFFSET, + iphoff+ih->ihl*4)) + return; + case IPPROTO_ICMP: { + struct icmphdr _icmph; + const struct icmphdr *ich; + static const size_t required_len[NR_ICMP_TYPES+1] + = { [ICMP_ECHOREPLY] = 4, + [ICMP_DEST_UNREACH] + = 8 + sizeof(struct iphdr), + [ICMP_SOURCE_QUENCH] + = 8 + sizeof(struct iphdr), + [ICMP_REDIRECT] + = 8 + sizeof(struct iphdr), + [ICMP_ECHO] = 4, + [ICMP_TIME_EXCEEDED] + = 8 + sizeof(struct iphdr), + [ICMP_PARAMETERPROB] + = 8 + sizeof(struct iphdr), + [ICMP_TIMESTAMP] = 20, + [ICMP_TIMESTAMPREPLY] = 20, + [ICMP_ADDRESS] = 12, + [ICMP_ADDRESSREPLY] = 12 }; + + /* Max length: 11 "PROTO=ICMP " */ + sb_add(m, "PROTO=ICMP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, + sizeof(_icmph), &_icmph); + if (ich == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + if (ich->type <= NR_ICMP_TYPES && + required_len[ich->type] && + skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + switch (ich->type) { + case ICMP_ECHOREPLY: + case ICMP_ECHO: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + sb_add(m, "ID=%u SEQ=%u ", + ntohs(ich->un.echo.id), + ntohs(ich->un.echo.sequence)); + break; + + case ICMP_PARAMETERPROB: + /* Max length: 14 "PARAMETER=255 " */ + sb_add(m, "PARAMETER=%u ", + ntohl(ich->un.gateway) >> 24); + break; + case ICMP_REDIRECT: + /* Max length: 24 "GATEWAY=255.255.255.255 " */ + sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway); + /* Fall through */ + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_TIME_EXCEEDED: + /* Max length: 3+maxlen */ + if (!iphoff) { /* Only recurse once. */ + sb_add(m, "["); + dump_ipv4_packet(m, info, skb, + iphoff + ih->ihl*4+sizeof(_icmph)); + sb_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ich->type == ICMP_DEST_UNREACH && + ich->code == ICMP_FRAG_NEEDED) + sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu)); + } + break; + } + /* Max Length */ + case IPPROTO_AH: { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 9 "PROTO=AH " */ + sb_add(m, "PROTO=AH "); + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ah = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_ahdr), &_ahdr); + if (ah == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); + break; + } + case IPPROTO_ESP: { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 10 "PROTO=ESP " */ + sb_add(m, "PROTO=ESP "); + + if (ntohs(ih->frag_off) & IP_OFFSET) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + eh = skb_header_pointer(skb, iphoff+ih->ihl*4, + sizeof(_esph), &_esph); + if (eh == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", + skb->len - iphoff - ih->ihl*4); + break; + } + + /* Length: 15 "SPI=0xF1234567 " */ + sb_add(m, "SPI=0x%x ", ntohl(eh->spi)); + break; + } + /* Max length: 10 "PROTO 255 " */ + default: + sb_add(m, "PROTO=%u ", ih->protocol); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && !iphoff && skb->sk) { + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket && skb->sk->sk_socket->file) + sb_add(m, "UID=%u GID=%u ", + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + } + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!iphoff && skb->mark) + sb_add(m, "MARK=0x%x ", skb->mark); + + /* Proto Max log string length */ + /* IP: 40+46+6+11+127 = 230 */ + /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ + /* UDP: 10+max(25,20) = 35 */ + /* UDPLITE: 14+max(25,20) = 39 */ + /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ + /* ESP: 10+max(25)+15 = 50 */ + /* AH: 9+max(25)+15 = 49 */ + /* unknown: 10 */ + + /* (ICMP allows recursion one level deep) */ + /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ + /* maxlen = 230+ 91 + 230 + 252 = 803 */ +} + +static void dump_ipv4_mac_header(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + sb_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int i; + + sb_add(m, "%02x", *p++); + for (i = 1; i < dev->hard_header_len; i++, p++) + sb_add(m, ":%02x", *p); + } + sb_add(m, " "); +} + +static void +log_packet_common(struct sbuff *m, + u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, + in ? in->name : "", + out ? out->name : ""); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + const struct net_device *physindev; + const struct net_device *physoutdev; + + physindev = skb->nf_bridge->physindev; + if (physindev && in != physindev) + sb_add(m, "PHYSIN=%s ", physindev->name); + physoutdev = skb->nf_bridge->physoutdev; + if (physoutdev && out != physoutdev) + sb_add(m, "PHYSOUT=%s ", physoutdev->name); + } +#endif +} + + +static void +ipt_log_packet(u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct sbuff *m = sb_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); + + if (in != NULL) + dump_ipv4_mac_header(m, loginfo, skb); + + dump_ipv4_packet(m, loginfo, skb, 0); + + sb_close(m); +} + +#if IS_ENABLED(CONFIG_IPV6) +/* One level of recursion won't kill us */ +static void dump_ipv6_packet(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb, unsigned int ip6hoff, + int recurse) +{ + u_int8_t currenthdr; + int fragment; + struct ipv6hdr _ip6h; + const struct ipv6hdr *ih; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; + + ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); + if (ih == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ + sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); + + /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ + sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", + ntohs(ih->payload_len) + sizeof(struct ipv6hdr), + (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, + ih->hop_limit, + (ntohl(*(__be32 *)ih) & 0x000fffff)); + + fragment = 0; + ptr = ip6hoff + sizeof(struct ipv6hdr); + currenthdr = ih->nexthdr; + while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { + struct ipv6_opt_hdr _hdr; + const struct ipv6_opt_hdr *hp; + + hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); + if (hp == NULL) { + sb_add(m, "TRUNCATED"); + return; + } + + /* Max length: 48 "OPT (...) " */ + if (logflags & XT_LOG_IPOPT) + sb_add(m, "OPT ( "); + + switch (currenthdr) { + case IPPROTO_FRAGMENT: { + struct frag_hdr _fhdr; + const struct frag_hdr *fh; + + sb_add(m, "FRAG:"); + fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), + &_fhdr); + if (fh == NULL) { + sb_add(m, "TRUNCATED "); + return; + } + + /* Max length: 6 "65535 " */ + sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8); + + /* Max length: 11 "INCOMPLETE " */ + if (fh->frag_off & htons(0x0001)) + sb_add(m, "INCOMPLETE "); + + sb_add(m, "ID:%08x ", ntohl(fh->identification)); + + if (ntohs(fh->frag_off) & 0xFFF8) + fragment = 1; + + hdrlen = 8; + + break; + } + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + case IPPROTO_HOPOPTS: + if (fragment) { + if (logflags & XT_LOG_IPOPT) + sb_add(m, ")"); + return; + } + hdrlen = ipv6_optlen(hp); + break; + /* Max Length */ + case IPPROTO_AH: + if (logflags & XT_LOG_IPOPT) { + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + + /* Max length: 3 "AH " */ + sb_add(m, "AH "); + + if (fragment) { + sb_add(m, ")"); + return; + } + + ah = skb_header_pointer(skb, ptr, sizeof(_ahdr), + &_ahdr); + if (ah == NULL) { + /* + * Max length: 26 "INCOMPLETE [65535 + * bytes] )" + */ + sb_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 15 "SPI=0xF1234567 */ + sb_add(m, "SPI=0x%x ", ntohl(ah->spi)); + + } + + hdrlen = (hp->hdrlen+2)<<2; + break; + case IPPROTO_ESP: + if (logflags & XT_LOG_IPOPT) { + struct ip_esp_hdr _esph; + const struct ip_esp_hdr *eh; + + /* Max length: 4 "ESP " */ + sb_add(m, "ESP "); + + if (fragment) { + sb_add(m, ")"); + return; + } + + /* + * Max length: 26 "INCOMPLETE [65535 bytes] )" + */ + eh = skb_header_pointer(skb, ptr, sizeof(_esph), + &_esph); + if (eh == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] )", + skb->len - ptr); + return; + } + + /* Length: 16 "SPI=0xF1234567 )" */ + sb_add(m, "SPI=0x%x )", ntohl(eh->spi)); + + } + return; + default: + /* Max length: 20 "Unknown Ext Hdr 255" */ + sb_add(m, "Unknown Ext Hdr %u", currenthdr); + return; + } + if (logflags & XT_LOG_IPOPT) + sb_add(m, ") "); + + currenthdr = hp->nexthdr; + ptr += hdrlen; + } + + switch (currenthdr) { + case IPPROTO_TCP: + if (dump_tcp_header(m, skb, currenthdr, fragment, ptr, + logflags)) + return; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + if (dump_udp_header(m, skb, currenthdr, fragment, ptr)) + return; + case IPPROTO_ICMPV6: { + struct icmp6hdr _icmp6h; + const struct icmp6hdr *ic; + + /* Max length: 13 "PROTO=ICMPv6 " */ + sb_add(m, "PROTO=ICMPv6 "); + + if (fragment) + break; + + /* Max length: 25 "INCOMPLETE [65535 bytes] " */ + ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); + if (ic == NULL) { + sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr); + return; + } + + /* Max length: 18 "TYPE=255 CODE=255 " */ + sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); + + switch (ic->icmp6_type) { + case ICMPV6_ECHO_REQUEST: + case ICMPV6_ECHO_REPLY: + /* Max length: 19 "ID=65535 SEQ=65535 " */ + sb_add(m, "ID=%u SEQ=%u ", + ntohs(ic->icmp6_identifier), + ntohs(ic->icmp6_sequence)); + break; + case ICMPV6_MGM_QUERY: + case ICMPV6_MGM_REPORT: + case ICMPV6_MGM_REDUCTION: + break; + + case ICMPV6_PARAMPROB: + /* Max length: 17 "POINTER=ffffffff " */ + sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer)); + /* Fall through */ + case ICMPV6_DEST_UNREACH: + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + /* Max length: 3+maxlen */ + if (recurse) { + sb_add(m, "["); + dump_ipv6_packet(m, info, skb, + ptr + sizeof(_icmp6h), 0); + sb_add(m, "] "); + } + + /* Max length: 10 "MTU=65535 " */ + if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) + sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu)); + } + break; + } + /* Max length: 10 "PROTO=255 " */ + default: + sb_add(m, "PROTO=%u ", currenthdr); + } + + /* Max length: 15 "UID=4294967295 " */ + if ((logflags & XT_LOG_UID) && recurse && skb->sk) { + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket && skb->sk->sk_socket->file) + sb_add(m, "UID=%u GID=%u ", + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); + read_unlock_bh(&skb->sk->sk_callback_lock); + } + + /* Max length: 16 "MARK=0xFFFFFFFF " */ + if (!recurse && skb->mark) + sb_add(m, "MARK=0x%x ", skb->mark); +} + +static void dump_ipv6_mac_header(struct sbuff *m, + const struct nf_loginfo *info, + const struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + unsigned int logflags = 0; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + + if (!(logflags & XT_LOG_MACDECODE)) + goto fallback; + + switch (dev->type) { + case ARPHRD_ETHER: + sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ", + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + ntohs(eth_hdr(skb)->h_proto)); + return; + default: + break; + } + +fallback: + sb_add(m, "MAC="); + if (dev->hard_header_len && + skb->mac_header != skb->network_header) { + const unsigned char *p = skb_mac_header(skb); + unsigned int len = dev->hard_header_len; + unsigned int i; + + if (dev->type == ARPHRD_SIT) { + p -= ETH_HLEN; + + if (p < skb->head) + p = NULL; + } + + if (p != NULL) { + sb_add(m, "%02x", *p++); + for (i = 1; i < len; i++) + sb_add(m, ":%02x", *p++); + } + sb_add(m, " "); + + if (dev->type == ARPHRD_SIT) { + const struct iphdr *iph = + (struct iphdr *)skb_mac_header(skb); + sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, + &iph->daddr); + } + } else + sb_add(m, " "); +} + +static void +ip6t_log_packet(u_int8_t pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) +{ + struct sbuff *m = sb_open(); + + if (!loginfo) + loginfo = &default_loginfo; + + log_packet_common(m, pf, hooknum, skb, in, out, loginfo, prefix); + + if (in != NULL) + dump_ipv6_mac_header(m, loginfo, skb); + + dump_ipv6_packet(m, loginfo, skb, skb_network_offset(skb), 1); + + sb_close(m); +} +#endif + +static unsigned int +log_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_log_info *loginfo = par->targinfo; + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; + + if (par->family == NFPROTO_IPV4) + ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, + par->out, &li, loginfo->prefix); +#if IS_ENABLED(CONFIG_IPV6) + else if (par->family == NFPROTO_IPV6) + ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, + par->out, &li, loginfo->prefix); +#endif + else + WARN_ON_ONCE(1); + + return XT_CONTINUE; +} + +static int log_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_log_info *loginfo = par->targinfo; + + if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6) + return -EINVAL; + + if (loginfo->level >= 8) { + pr_debug("level %u >= 8\n", loginfo->level); + return -EINVAL; + } + + if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { + pr_debug("prefix is not null-terminated\n"); + return -EINVAL; + } + + return 0; +} + +static struct xt_target log_tg_regs[] __read_mostly = { + { + .name = "LOG", + .family = NFPROTO_IPV4, + .target = log_tg, + .targetsize = sizeof(struct xt_log_info), + .checkentry = log_tg_check, + .me = THIS_MODULE, + }, +#if IS_ENABLED(CONFIG_IPV6) + { + .name = "LOG", + .family = NFPROTO_IPV6, + .target = log_tg, + .targetsize = sizeof(struct xt_log_info), + .checkentry = log_tg_check, + .me = THIS_MODULE, + }, +#endif +}; + +static struct nf_logger ipt_log_logger __read_mostly = { + .name = "ipt_LOG", + .logfn = &ipt_log_packet, + .me = THIS_MODULE, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static struct nf_logger ip6t_log_logger __read_mostly = { + .name = "ip6t_LOG", + .logfn = &ip6t_log_packet, + .me = THIS_MODULE, +}; +#endif + +static int __init log_tg_init(void) +{ + int ret; + + ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); + if (ret < 0) + return ret; + + nf_log_register(NFPROTO_IPV4, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IPV6) + nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); +#endif + return 0; +} + +static void __exit log_tg_exit(void) +{ + nf_log_unregister(&ipt_log_logger); +#if IS_ENABLED(CONFIG_IPV6) + nf_log_unregister(&ip6t_log_logger); +#endif + xt_unregister_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); +} + +module_init(log_tg_init); +module_exit(log_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_AUTHOR("Jan Rekorajski "); +MODULE_DESCRIPTION("Xtables: IPv4/IPv6 packet logging"); +MODULE_ALIAS("ipt_LOG"); +MODULE_ALIAS("ip6t_LOG"); -- cgit v1.2.3 From 2c8503f55fbdfbeff4164f133df804cf4d316290 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2012 18:23:31 +0100 Subject: netfilter: nf_conntrack: pass timeout array to l4->new and l4->packet This patch defines a new interface for l4 protocol trackers: unsigned int *(*get_timeouts)(struct net *net); that is used to return the array of unsigned int that contains the timeouts that will be applied for this flow. This is passed to the l4proto->new(...) and l4proto->packet(...) functions to specify the timeout policy. This interface allows per-net global timeout configuration (although only DCCP supports this by now) and it will allow custom custom timeout configuration by means of follow-up patches. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 8 +++++-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 13 +++++++++--- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 13 +++++++++--- net/netfilter/nf_conntrack_core.c | 18 ++++++++++------ net/netfilter/nf_conntrack_proto_dccp.c | 16 +++++++++----- net/netfilter/nf_conntrack_proto_generic.c | 29 ++++++++++++++++---------- net/netfilter/nf_conntrack_proto_gre.c | 15 +++++++++---- net/netfilter/nf_conntrack_proto_sctp.c | 14 ++++++++++--- net/netfilter/nf_conntrack_proto_tcp.c | 22 ++++++++++++------- net/netfilter/nf_conntrack_proto_udp.c | 16 ++++++++++---- net/netfilter/nf_conntrack_proto_udplite.c | 16 ++++++++++---- 11 files changed, 128 insertions(+), 52 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index e3d3ee3c06a2..c48b67405aa0 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -39,12 +39,13 @@ struct nf_conntrack_l4proto { unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum); + unsigned int hooknum, + unsigned int *timeouts); /* Called when a new connection for this protocol found; * returns TRUE if it's OK. If so, packet() called next. */ bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff); + unsigned int dataoff, unsigned int *timeouts); /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); @@ -60,6 +61,9 @@ struct nf_conntrack_l4proto { /* Print out the private part of the conntrack. */ int (*print_conntrack)(struct seq_file *s, struct nf_conn *); + /* Return the array of timeouts for this protocol. */ + unsigned int *(*get_timeouts)(struct net *net); + /* convert protoinfo to nfnetink attributes */ int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, struct nf_conn *ct); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index ab5b27a2916f..6b801124b31f 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -75,25 +75,31 @@ static int icmp_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } +static unsigned int *icmp_get_timeouts(struct net *net) +{ + return &nf_ct_icmp_timeout; +} + /* Returns verdict for packet, or -1 for invalid. */ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeout) { /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { static const u_int8_t valid_new[] = { [ICMP_ECHO] = 1, @@ -298,6 +304,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .invert_tuple = icmp_invert_tuple, .print_tuple = icmp_print_tuple, .packet = icmp_packet, + .get_timeouts = icmp_get_timeouts, .new = icmp_new, .error = icmp_error, .destroy = NULL, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 7c05e7eacbc6..2eb9751eb7a8 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -88,25 +88,31 @@ static int icmpv6_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } +static unsigned int *icmpv6_get_timeouts(struct net *net) +{ + return &nf_ct_icmpv6_timeout; +} + /* Returns verdict for packet, or -1 for invalid. */ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeout) { /* Do not immediately delete the connection after the first successful reply to avoid excessive conntrackd traffic and also to handle correctly ICMP echo reply duplicates. */ - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { static const u_int8_t valid_new[] = { [ICMPV6_ECHO_REQUEST - 128] = 1, @@ -293,6 +299,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .invert_tuple = icmpv6_invert_tuple, .print_tuple = icmpv6_print_tuple, .packet = icmpv6_packet, + .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, .error = icmpv6_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ed86a3be678e..d18995eea1c6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -763,7 +763,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, - unsigned int dataoff, u32 hash) + unsigned int dataoff, u32 hash, + unsigned int *timeouts) { struct nf_conn *ct; struct nf_conn_help *help; @@ -782,7 +783,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - if (!l4proto->new(ct, skb, dataoff)) { + if (!l4proto->new(ct, skb, dataoff, timeouts)) { nf_conntrack_free(ct); pr_debug("init conntrack: can't track with proto module\n"); return NULL; @@ -848,7 +849,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, int *set_reply, - enum ip_conntrack_info *ctinfo) + enum ip_conntrack_info *ctinfo, + unsigned int *timeouts) { struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; @@ -868,7 +870,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, - skb, dataoff, hash); + skb, dataoff, hash, timeouts); if (!h) return NULL; if (IS_ERR(h)) @@ -909,6 +911,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; + unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; int set_reply = 0; @@ -955,8 +958,11 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, goto out; } + timeouts = l4proto->get_timeouts(net); + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, - l3proto, l4proto, &set_reply, &ctinfo); + l3proto, l4proto, &set_reply, &ctinfo, + timeouts); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); @@ -973,7 +979,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); - ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index d6dde6dc09e6..8ea33598a0a7 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -423,7 +423,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, } static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct dccp_net *dn; @@ -472,12 +472,17 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh) ntohl(dhack->dccph_ack_nr_low); } +static unsigned int *dccp_get_timeouts(struct net *net) +{ + return dccp_pernet(net)->dccp_timeout; +} + static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int hooknum) + u_int8_t pf, unsigned int hooknum, + unsigned int *timeouts) { struct net *net = nf_ct_net(ct); - struct dccp_net *dn; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -559,8 +564,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); - dn = dccp_pernet(net); - nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); return NF_ACCEPT; } @@ -767,6 +771,7 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .invert_tuple = dccp_invert_tuple, .new = dccp_new, .packet = dccp_packet, + .get_timeouts = dccp_get_timeouts, .error = dccp_error, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, @@ -789,6 +794,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .invert_tuple = dccp_invert_tuple, .new = dccp_new, .packet = dccp_packet, + .get_timeouts = dccp_get_timeouts, .error = dccp_error, .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e2091d0c7a2f..0e6c5451db80 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -40,21 +40,27 @@ static int generic_print_tuple(struct seq_file *s, return 0; } +static unsigned int *generic_get_timeouts(struct net *net) +{ + return &nf_ct_generic_timeout; +} + /* Returns verdict for packet, or -1 for invalid. */ -static int packet(struct nf_conn *ct, - const struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum) +static int generic_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + u_int8_t pf, + unsigned int hooknum, + unsigned int *timeout) { - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout); + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static bool new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) +static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff, unsigned int *timeouts) { return true; } @@ -93,8 +99,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, - .packet = packet, - .new = new, + .packet = generic_packet, + .get_timeouts = generic_get_timeouts, + .new = generic_new, #ifdef CONFIG_SYSCTL .ctl_table_header = &generic_sysctl_header, .ctl_table = generic_sysctl_table, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 8144f22d159a..1bf01c95658b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -235,13 +235,19 @@ static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) (ct->proto.gre.stream_timeout / HZ)); } +static unsigned int *gre_get_timeouts(struct net *net) +{ + return gre_timeouts; +} + /* Returns verdict for packet, and may modify conntrack */ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { /* If we've seen traffic both ways, this is a GRE connection. * Extend timeout. */ @@ -260,15 +266,15 @@ static int gre_packet(struct nf_conn *ct, /* Called when a new connection for this protocol found. */ static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { pr_debug(": "); nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); /* initialize to sane value. Ideally a conntrack helper * (e.g. in case of pptp) is increasing them */ - ct->proto.gre.stream_timeout = gre_timeouts[GRE_CT_REPLIED]; - ct->proto.gre.timeout = gre_timeouts[GRE_CT_UNREPLIED]; + ct->proto.gre.stream_timeout = timeouts[GRE_CT_REPLIED]; + ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED]; return true; } @@ -295,6 +301,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, .print_conntrack = gre_print_conntrack, + .get_timeouts = gre_get_timeouts, .packet = gre_packet, .new = gre_new, .destroy = gre_destroy, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index afa69136061a..2a0703371e24 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -279,13 +279,19 @@ static int sctp_new_state(enum ip_conntrack_dir dir, return sctp_conntracks[dir][i][cur_state]; } +static unsigned int *sctp_get_timeouts(struct net *net) +{ + return sctp_timeouts; +} + /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { enum sctp_conntrack new_state, old_state; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -370,7 +376,7 @@ static int sctp_packet(struct nf_conn *ct, } spin_unlock_bh(&ct->lock); - nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]); + nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && dir == IP_CT_DIR_REPLY && @@ -390,7 +396,7 @@ out: /* Called when a new connection for this protocol found. */ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { enum sctp_conntrack new_state; const struct sctphdr *sh; @@ -664,6 +670,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .print_tuple = sctp_print_tuple, .print_conntrack = sctp_print_conntrack, .packet = sctp_packet, + .get_timeouts = sctp_get_timeouts, .new = sctp_new, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -694,6 +701,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .print_tuple = sctp_print_tuple, .print_conntrack = sctp_print_conntrack, .packet = sctp_packet, + .get_timeouts = sctp_get_timeouts, .new = sctp_new, .me = THIS_MODULE, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 57c778546094..8372bb43feb0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -813,13 +813,19 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +static unsigned int *tcp_get_timeouts(struct net *net) +{ + return tcp_timeouts; +} + /* Returns verdict for packet, or -1 for invalid. */ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; @@ -1014,14 +1020,14 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && - tcp_timeouts[new_state] > tcp_timeouts[TCP_CONNTRACK_RETRANS]) - timeout = tcp_timeouts[TCP_CONNTRACK_RETRANS]; + timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) + timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && - tcp_timeouts[new_state] > tcp_timeouts[TCP_CONNTRACK_UNACK]) - timeout = tcp_timeouts[TCP_CONNTRACK_UNACK]; + timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) + timeout = timeouts[TCP_CONNTRACK_UNACK]; else - timeout = tcp_timeouts[new_state]; + timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); if (new_state != old_state) @@ -1053,7 +1059,7 @@ static int tcp_packet(struct nf_conn *ct, /* Called when a new connection for this protocol found. */ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { enum tcp_conntrack new_state; const struct tcphdr *th; @@ -1444,6 +1450,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, + .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -1476,6 +1483,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, + .get_timeouts = tcp_get_timeouts, .new = tcp_new, .error = tcp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 5b24ff882f95..70e005992d5b 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -71,32 +71,38 @@ static int udp_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.udp.port)); } +static unsigned int *udp_get_timeouts(struct net *net) +{ + return udp_timeouts; +} + /* Returns verdict for packet, and may modify conntracktype */ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_refresh_acct(ct, ctinfo, skb, - udp_timeouts[UDP_CT_REPLIED]); + timeouts[UDP_CT_REPLIED]); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, - udp_timeouts[UDP_CT_UNREPLIED]); + timeouts[UDP_CT_UNREPLIED]); } return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { return true; } @@ -196,6 +202,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, .packet = udp_packet, + .get_timeouts = udp_get_timeouts, .new = udp_new, .error = udp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -224,6 +231,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .invert_tuple = udp_invert_tuple, .print_tuple = udp_print_tuple, .packet = udp_packet, + .get_timeouts = udp_get_timeouts, .new = udp_new, .error = udp_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index e73071743e01..0b32ccb1d515 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -68,32 +68,38 @@ static int udplite_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.udp.port)); } +static unsigned int *udplite_get_timeouts(struct net *net) +{ + return udplite_timeouts; +} + /* Returns verdict for packet, and may modify conntracktype */ static int udplite_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, - unsigned int hooknum) + unsigned int hooknum, + unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP stream. Extend timeout. */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { nf_ct_refresh_acct(ct, ctinfo, skb, - udplite_timeouts[UDPLITE_CT_REPLIED]); + timeouts[UDPLITE_CT_REPLIED]); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, - udplite_timeouts[UDPLITE_CT_UNREPLIED]); + timeouts[UDPLITE_CT_UNREPLIED]); } return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, - unsigned int dataoff) + unsigned int dataoff, unsigned int *timeouts) { return true; } @@ -181,6 +187,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, .packet = udplite_packet, + .get_timeouts = udplite_get_timeouts, .new = udplite_new, .error = udplite_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -205,6 +212,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .invert_tuple = udplite_invert_tuple, .print_tuple = udplite_print_tuple, .packet = udplite_packet, + .get_timeouts = udplite_get_timeouts, .new = udplite_new, .error = udplite_error, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) -- cgit v1.2.3 From 50978462300f74dc48aea4a38471cb69bdf741a5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Feb 2012 19:13:48 +0100 Subject: netfilter: add cttimeout infrastructure for fine timeout tuning This patch adds the infrastructure to add fine timeout tuning over nfnetlink. Now you can use the NFNL_SUBSYS_CTNETLINK_TIMEOUT subsystem to create/delete/dump timeout objects that contain some specific timeout policy for one flow. The follow up patches will allow you attach timeout policy object to conntrack via the CT target and the conntrack extension infrastructure. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_cttimeout.h | 114 +++++++ include/net/netfilter/nf_conntrack_l4proto.h | 11 + net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 47 +++ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 47 +++ net/netfilter/Kconfig | 11 + net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 70 +++++ net/netfilter/nf_conntrack_proto_generic.c | 48 +++ net/netfilter/nf_conntrack_proto_gre.c | 55 ++++ net/netfilter/nf_conntrack_proto_sctp.c | 69 +++++ net/netfilter/nf_conntrack_proto_tcp.c | 127 ++++++++ net/netfilter/nf_conntrack_proto_udp.c | 64 ++++ net/netfilter/nf_conntrack_proto_udplite.c | 66 ++++ net/netfilter/nfnetlink_cttimeout.c | 398 +++++++++++++++++++++++++ 16 files changed, 1131 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter/nfnetlink_cttimeout.h create mode 100644 net/netfilter/nfnetlink_cttimeout.c (limited to 'net/ipv6') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index aaa72aaa21de..1697036336b6 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,7 @@ header-y += nfnetlink.h header-y += nfnetlink_acct.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h +header-y += nfnetlink_cttimeout.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h header-y += x_tables.h diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index b64454c2f79f..6fd1f0d07e64 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -49,7 +49,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_OSF 5 #define NFNL_SUBSYS_IPSET 6 #define NFNL_SUBSYS_ACCT 7 -#define NFNL_SUBSYS_COUNT 8 +#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 +#define NFNL_SUBSYS_COUNT 9 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink_cttimeout.h b/include/linux/netfilter/nfnetlink_cttimeout.h new file mode 100644 index 000000000000..a2810a7c5e30 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_cttimeout.h @@ -0,0 +1,114 @@ +#ifndef _CTTIMEOUT_NETLINK_H +#define _CTTIMEOUT_NETLINK_H +#include + +enum ctnl_timeout_msg_types { + IPCTNL_MSG_TIMEOUT_NEW, + IPCTNL_MSG_TIMEOUT_GET, + IPCTNL_MSG_TIMEOUT_DELETE, + + IPCTNL_MSG_TIMEOUT_MAX +}; + +enum ctattr_timeout { + CTA_TIMEOUT_UNSPEC, + CTA_TIMEOUT_NAME, + CTA_TIMEOUT_L3PROTO, + CTA_TIMEOUT_L4PROTO, + CTA_TIMEOUT_DATA, + CTA_TIMEOUT_USE, + __CTA_TIMEOUT_MAX +}; +#define CTA_TIMEOUT_MAX (__CTA_TIMEOUT_MAX - 1) + +enum ctattr_timeout_generic { + CTA_TIMEOUT_GENERIC_UNSPEC, + CTA_TIMEOUT_GENERIC_TIMEOUT, + __CTA_TIMEOUT_GENERIC_MAX +}; +#define CTA_TIMEOUT_GENERIC_MAX (__CTA_TIMEOUT_GENERIC_MAX - 1) + +enum ctattr_timeout_tcp { + CTA_TIMEOUT_TCP_UNSPEC, + CTA_TIMEOUT_TCP_SYN_SENT, + CTA_TIMEOUT_TCP_SYN_RECV, + CTA_TIMEOUT_TCP_ESTABLISHED, + CTA_TIMEOUT_TCP_FIN_WAIT, + CTA_TIMEOUT_TCP_CLOSE_WAIT, + CTA_TIMEOUT_TCP_LAST_ACK, + CTA_TIMEOUT_TCP_TIME_WAIT, + CTA_TIMEOUT_TCP_CLOSE, + CTA_TIMEOUT_TCP_SYN_SENT2, + CTA_TIMEOUT_TCP_RETRANS, + CTA_TIMEOUT_TCP_UNACK, + __CTA_TIMEOUT_TCP_MAX +}; +#define CTA_TIMEOUT_TCP_MAX (__CTA_TIMEOUT_TCP_MAX - 1) + +enum ctattr_timeout_udp { + CTA_TIMEOUT_UDP_UNSPEC, + CTA_TIMEOUT_UDP_UNREPLIED, + CTA_TIMEOUT_UDP_REPLIED, + __CTA_TIMEOUT_UDP_MAX +}; +#define CTA_TIMEOUT_UDP_MAX (__CTA_TIMEOUT_UDP_MAX - 1) + +enum ctattr_timeout_udplite { + CTA_TIMEOUT_UDPLITE_UNSPEC, + CTA_TIMEOUT_UDPLITE_UNREPLIED, + CTA_TIMEOUT_UDPLITE_REPLIED, + __CTA_TIMEOUT_UDPLITE_MAX +}; +#define CTA_TIMEOUT_UDPLITE_MAX (__CTA_TIMEOUT_UDPLITE_MAX - 1) + +enum ctattr_timeout_icmp { + CTA_TIMEOUT_ICMP_UNSPEC, + CTA_TIMEOUT_ICMP_TIMEOUT, + __CTA_TIMEOUT_ICMP_MAX +}; +#define CTA_TIMEOUT_ICMP_MAX (__CTA_TIMEOUT_ICMP_MAX - 1) + +enum ctattr_timeout_dccp { + CTA_TIMEOUT_DCCP_UNSPEC, + CTA_TIMEOUT_DCCP_REQUEST, + CTA_TIMEOUT_DCCP_RESPOND, + CTA_TIMEOUT_DCCP_PARTOPEN, + CTA_TIMEOUT_DCCP_OPEN, + CTA_TIMEOUT_DCCP_CLOSEREQ, + CTA_TIMEOUT_DCCP_CLOSING, + CTA_TIMEOUT_DCCP_TIMEWAIT, + __CTA_TIMEOUT_DCCP_MAX +}; +#define CTA_TIMEOUT_DCCP_MAX (__CTA_TIMEOUT_DCCP_MAX - 1) + +enum ctattr_timeout_sctp { + CTA_TIMEOUT_SCTP_UNSPEC, + CTA_TIMEOUT_SCTP_CLOSED, + CTA_TIMEOUT_SCTP_COOKIE_WAIT, + CTA_TIMEOUT_SCTP_COOKIE_ECHOED, + CTA_TIMEOUT_SCTP_ESTABLISHED, + CTA_TIMEOUT_SCTP_SHUTDOWN_SENT, + CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, + CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, + __CTA_TIMEOUT_SCTP_MAX +}; +#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) + +enum ctattr_timeout_icmpv6 { + CTA_TIMEOUT_ICMPV6_UNSPEC, + CTA_TIMEOUT_ICMPV6_TIMEOUT, + __CTA_TIMEOUT_ICMPV6_MAX +}; +#define CTA_TIMEOUT_ICMPV6_MAX (__CTA_TIMEOUT_ICMPV6_MAX - 1) + +enum ctattr_timeout_gre { + CTA_TIMEOUT_GRE_UNSPEC, + CTA_TIMEOUT_GRE_UNREPLIED, + CTA_TIMEOUT_GRE_REPLIED, + __CTA_TIMEOUT_GRE_MAX +}; +#define CTA_TIMEOUT_GRE_MAX (__CTA_TIMEOUT_GRE_MAX - 1) + +#define CTNL_TIMEOUT_NAME_MAX 32 + +#endif diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index c48b67405aa0..90c67c7db7e9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -83,6 +83,17 @@ struct nf_conntrack_l4proto { size_t nla_size; +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + struct { + size_t obj_size; + int (*nlattr_to_obj)(struct nlattr *tb[], void *data); + int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); + + unsigned int nlattr_max; + const struct nla_policy *nla_policy; + } ctnl_timeout; +#endif + #ifdef CONFIG_SYSCTL struct ctl_table_header **ctl_table_header; struct ctl_table *ctl_table; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 6b801124b31f..7cbe9cb261c2 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -269,6 +269,44 @@ static int icmp_nlattr_tuple_size(void) } #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeout = data; + + if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; + } else { + /* Set default ICMP timeout. */ + *timeout = nf_ct_icmp_timeout; + } + return 0; +} + +static int +icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)); + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { + [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static struct ctl_table_header *icmp_sysctl_header; static struct ctl_table icmp_sysctl_table[] = { @@ -315,6 +353,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .nlattr_to_tuple = icmp_nlattr_to_tuple, .nla_policy = icmp_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = icmp_timeout_nlattr_to_obj, + .obj_to_nlattr = icmp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_ICMP_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = icmp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_header = &icmp_sysctl_header, .ctl_table = icmp_sysctl_table, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 2eb9751eb7a8..92cc9f2931ae 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -276,6 +276,44 @@ static int icmpv6_nlattr_tuple_size(void) } #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeout = data; + + if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; + } else { + /* Set default ICMPv6 timeout. */ + *timeout = nf_ct_icmpv6_timeout; + } + return 0; +} + +static int +icmpv6_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_ICMPV6_TIMEOUT, htonl(*timeout / HZ)); + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = { + [CTA_TIMEOUT_ICMPV6_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static struct ctl_table_header *icmpv6_sysctl_header; static struct ctl_table icmpv6_sysctl_table[] = { @@ -308,6 +346,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .nlattr_to_tuple = icmpv6_nlattr_to_tuple, .nla_policy = icmpv6_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = icmpv6_timeout_nlattr_to_obj, + .obj_to_nlattr = icmpv6_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_ICMP_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = icmpv6_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_header = &icmpv6_sysctl_header, .ctl_table = icmpv6_sysctl_table, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index b895d8b13215..f3efb6570dd9 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -314,6 +314,17 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +config NF_CT_NETLINK_TIMEOUT + tristate 'Connection tracking timeout tuning via Netlink' + select NETFILTER_NETLINK + depends on NETFILTER_ADVANCED + help + This option enables support for connection tracking timeout + fine-grain tuning. This allows you to attach specific timeout + policies to flows, instead of using the global timeout policy. + + If unsure, say `N'. + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a28c2a6563f8..cf7cdd630bdd 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o +obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o # connection tracking helpers nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8ea33598a0a7..24fdce256cb0 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -706,8 +706,60 @@ static int dccp_nlattr_size(void) return nla_total_size(0) /* CTA_PROTOINFO_DCCP */ + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1); } + #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + struct dccp_net *dn = dccp_pernet(&init_net); + unsigned int *timeouts = data; + int i; + + /* set default DCCP timeouts. */ + for (i=0; idccp_timeout[i]; + + /* there's a 1:1 mapping between attributes and protocol states. */ + for (i=CTA_TIMEOUT_DCCP_UNSPEC+1; i +#include + +static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeout = data; + + if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; + else { + /* Set default generic timeout. */ + *timeout = nf_ct_generic_timeout; + } + + return 0; +} + +static int +generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ)); + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { + [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static struct ctl_table_header *generic_sysctl_header; static struct ctl_table generic_sysctl_table[] = { @@ -102,6 +141,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .packet = generic_packet, .get_timeouts = generic_get_timeouts, .new = generic_new, +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = generic_timeout_nlattr_to_obj, + .obj_to_nlattr = generic_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = generic_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_header = &generic_sysctl_header, .ctl_table = generic_sysctl_table, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 1bf01c95658b..659648c4b14a 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -292,6 +292,52 @@ static void gre_destroy(struct nf_conn *ct) nf_ct_gre_keymap_destroy(master); } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int gre_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + + /* set default timeouts for GRE. */ + timeouts[GRE_CT_UNREPLIED] = gre_timeouts[GRE_CT_UNREPLIED]; + timeouts[GRE_CT_REPLIED] = gre_timeouts[GRE_CT_REPLIED]; + + if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) { + timeouts[GRE_CT_UNREPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_UNREPLIED])) * HZ; + } + if (tb[CTA_TIMEOUT_GRE_REPLIED]) { + timeouts[GRE_CT_REPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_GRE_REPLIED])) * HZ; + } + return 0; +} + +static int +gre_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeouts = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_GRE_UNREPLIED, + htonl(timeouts[GRE_CT_UNREPLIED] / HZ)); + NLA_PUT_BE32(skb, CTA_TIMEOUT_GRE_REPLIED, + htonl(timeouts[GRE_CT_REPLIED] / HZ)); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = { + [CTA_TIMEOUT_GRE_UNREPLIED] = { .type = NLA_U32 }, + [CTA_TIMEOUT_GRE_REPLIED] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + /* protocol helper struct */ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, @@ -312,6 +358,15 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = gre_timeout_nlattr_to_obj, + .obj_to_nlattr = gre_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_GRE_MAX, + .obj_size = sizeof(unsigned int) * GRE_CT_MAX, + .nla_policy = gre_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ }; static int proto_gre_net_init(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 2a0703371e24..72b5088592dc 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -549,6 +549,57 @@ static int sctp_nlattr_size(void) } #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + int i; + + /* set default SCTP timeouts. */ + for (i=0; i +#include + +static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + int i; + + /* set default TCP timeouts. */ + for (i=0; i +#include + +static int udp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + + /* set default timeouts for UDP. */ + timeouts[UDP_CT_UNREPLIED] = udp_timeouts[UDP_CT_UNREPLIED]; + timeouts[UDP_CT_REPLIED] = udp_timeouts[UDP_CT_REPLIED]; + + if (tb[CTA_TIMEOUT_UDP_UNREPLIED]) { + timeouts[UDP_CT_UNREPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_UNREPLIED])) * HZ; + } + if (tb[CTA_TIMEOUT_UDP_REPLIED]) { + timeouts[UDP_CT_REPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDP_REPLIED])) * HZ; + } + return 0; +} + +static int +udp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeouts = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDP_UNREPLIED, + htonl(timeouts[UDP_CT_UNREPLIED] / HZ)); + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDP_REPLIED, + htonl(timeouts[UDP_CT_REPLIED] / HZ)); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = { + [CTA_TIMEOUT_UDP_UNREPLIED] = { .type = NLA_U32 }, + [CTA_TIMEOUT_UDP_REPLIED] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static unsigned int udp_sysctl_table_users; static struct ctl_table_header *udp_sysctl_header; @@ -211,6 +257,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, .ctl_table_header = &udp_sysctl_header, @@ -240,6 +295,15 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udp_timeout_nlattr_to_obj, + .obj_to_nlattr = udp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDP_MAX, + .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, + .nla_policy = udp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udp_sysctl_table_users, .ctl_table_header = &udp_sysctl_header, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0b32ccb1d515..e0606392cda0 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -156,6 +156,52 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include +#include + +static int udplite_timeout_nlattr_to_obj(struct nlattr *tb[], void *data) +{ + unsigned int *timeouts = data; + + /* set default timeouts for UDPlite. */ + timeouts[UDPLITE_CT_UNREPLIED] = udplite_timeouts[UDPLITE_CT_UNREPLIED]; + timeouts[UDPLITE_CT_REPLIED] = udplite_timeouts[UDPLITE_CT_REPLIED]; + + if (tb[CTA_TIMEOUT_UDPLITE_UNREPLIED]) { + timeouts[UDPLITE_CT_UNREPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_UNREPLIED])) * HZ; + } + if (tb[CTA_TIMEOUT_UDPLITE_REPLIED]) { + timeouts[UDPLITE_CT_REPLIED] = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_UDPLITE_REPLIED])) * HZ; + } + return 0; +} + +static int +udplite_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeouts = data; + + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDPLITE_UNREPLIED, + htonl(timeouts[UDPLITE_CT_UNREPLIED] / HZ)); + NLA_PUT_BE32(skb, CTA_TIMEOUT_UDPLITE_REPLIED, + htonl(timeouts[UDPLITE_CT_REPLIED] / HZ)); + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy +udplite_timeout_nla_policy[CTA_TIMEOUT_UDPLITE_MAX+1] = { + [CTA_TIMEOUT_UDPLITE_UNREPLIED] = { .type = NLA_U32 }, + [CTA_TIMEOUT_UDPLITE_REPLIED] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + #ifdef CONFIG_SYSCTL static unsigned int udplite_sysctl_table_users; static struct ctl_table_header *udplite_sysctl_header; @@ -196,6 +242,16 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udplite_timeout_nlattr_to_obj, + .obj_to_nlattr = udplite_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, + .obj_size = sizeof(unsigned int) * + CTA_TIMEOUT_UDPLITE_MAX, + .nla_policy = udplite_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, .ctl_table_header = &udplite_sysctl_header, @@ -221,6 +277,16 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = udplite_timeout_nlattr_to_obj, + .obj_to_nlattr = udplite_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_UDPLITE_MAX, + .obj_size = sizeof(unsigned int) * + CTA_TIMEOUT_UDPLITE_MAX, + .nla_policy = udplite_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &udplite_sysctl_table_users, .ctl_table_header = &udplite_sysctl_header, diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c new file mode 100644 index 000000000000..b860d5217189 --- /dev/null +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -0,0 +1,398 @@ +/* + * (C) 2012 by Pablo Neira Ayuso + * (C) 2012 by Vyatta Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); + +struct ctnl_timeout { + struct list_head head; + struct rcu_head rcu_head; + atomic_t refcnt; + char name[CTNL_TIMEOUT_NAME_MAX]; + __u16 l3num; + __u8 l4num; + char data[0]; +}; + +static LIST_HEAD(cttimeout_list); + +static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { + [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING }, + [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, + [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, + [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, +}; + +static int +ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, + struct nf_conntrack_l4proto *l4proto, + const struct nlattr *attr) +{ + int ret = 0; + + if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { + struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; + + nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy); + + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, &timeout->data); + } + return ret; +} + +static int +cttimeout_new_timeout(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + __u16 l3num; + __u8 l4num; + struct nf_conntrack_l4proto *l4proto; + struct ctnl_timeout *timeout, *matching = NULL; + char *name; + int ret; + + if (!cda[CTA_TIMEOUT_NAME] || + !cda[CTA_TIMEOUT_L3PROTO] || + !cda[CTA_TIMEOUT_L4PROTO] || + !cda[CTA_TIMEOUT_DATA]) + return -EINVAL; + + name = nla_data(cda[CTA_TIMEOUT_NAME]); + l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); + + list_for_each_entry(timeout, &cttimeout_list, head) { + if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + matching = timeout; + break; + } + + l4proto = __nf_ct_l4proto_find(l3num, l4num); + + /* This protocol is not supportted, skip. */ + if (l4proto->l4proto != l4num) + return -EOPNOTSUPP; + + if (matching) { + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + /* You cannot replace one timeout policy by another of + * different kind, sorry. + */ + if (matching->l3num != l3num || + matching->l4num != l4num) + return -EINVAL; + + ret = ctnl_timeout_parse_policy(matching, l4proto, + cda[CTA_TIMEOUT_DATA]); + return ret; + } + return -EBUSY; + } + + timeout = kzalloc(sizeof(struct ctnl_timeout) + + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); + if (timeout == NULL) + return -ENOMEM; + + ret = ctnl_timeout_parse_policy(timeout, l4proto, + cda[CTA_TIMEOUT_DATA]); + if (ret < 0) + goto err; + + strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); + timeout->l3num = l3num; + timeout->l4num = l4num; + atomic_set(&timeout->refcnt, 1); + list_add_tail_rcu(&timeout->head, &cttimeout_list); + + return 0; +err: + kfree(timeout); + return ret; +} + +static int +ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct ctnl_timeout *timeout) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + struct nf_conntrack_l4proto *l4proto; + + event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + NLA_PUT_STRING(skb, CTA_TIMEOUT_NAME, timeout->name); + NLA_PUT_BE16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)); + NLA_PUT_U8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4num); + NLA_PUT_BE32(skb, CTA_TIMEOUT_USE, + htonl(atomic_read(&timeout->refcnt))); + + l4proto = __nf_ct_l4proto_find(timeout->l3num, timeout->l4num); + + /* If the timeout object does not match the layer 4 protocol tracker, + * then skip dumping the data part since we don't know how to + * interpret it. This may happen for UPDlite, SCTP and DCCP since + * you can unload the module. + */ + if (timeout->l4num != l4proto->l4proto) + goto out; + + if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { + struct nlattr *nest_parms; + int ret; + + nest_parms = nla_nest_start(skb, + CTA_TIMEOUT_DATA | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->data); + if (ret < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + } +out: + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ctnl_timeout *cur, *last; + + if (cb->args[2]) + return 0; + + last = (struct ctnl_timeout *)cb->args[1]; + if (cb->args[1]) + cb->args[1] = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &cttimeout_list, head) { + if (last && cur != last) + continue; + + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + break; + } + } + if (!cb->args[1]) + cb->args[2] = 1; + rcu_read_unlock(); + return skb->len; +} + +static int +cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + int ret = -ENOENT; + char *name; + struct ctnl_timeout *cur; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnl_timeout_dump, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + if (!cda[CTA_TIMEOUT_NAME]) + return -EINVAL; + name = nla_data(cda[CTA_TIMEOUT_NAME]); + + list_for_each_entry(cur, &cttimeout_list, head) { + struct sk_buff *skb2; + + if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } + + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + IPCTNL_MSG_TIMEOUT_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; + } + return ret; +} + +/* try to delete object, fail if it is still in use. */ +static int ctnl_timeout_try_del(struct ctnl_timeout *timeout) +{ + int ret = 0; + + /* we want to avoid races with nf_ct_timeout_find_get. */ + if (atomic_dec_and_test(&timeout->refcnt)) { + /* We are protected by nfnl mutex. */ + list_del_rcu(&timeout->head); + kfree_rcu(timeout, rcu_head); + } else { + /* still in use, restore reference counter. */ + atomic_inc(&timeout->refcnt); + ret = -EBUSY; + } + return ret; +} + +static int +cttimeout_del_timeout(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + char *name; + struct ctnl_timeout *cur; + int ret = -ENOENT; + + if (!cda[CTA_TIMEOUT_NAME]) { + list_for_each_entry(cur, &cttimeout_list, head) + ctnl_timeout_try_del(cur); + + return 0; + } + name = nla_data(cda[CTA_TIMEOUT_NAME]); + + list_for_each_entry(cur, &cttimeout_list, head) { + if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) + continue; + + ret = ctnl_timeout_try_del(cur); + if (ret < 0) + return ret; + + break; + } + return ret; +} + +static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { + [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, + [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, + .attr_count = CTA_TIMEOUT_MAX, + .policy = cttimeout_nla_policy }, +}; + +static const struct nfnetlink_subsystem cttimeout_subsys = { + .name = "conntrack_timeout", + .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, + .cb_count = IPCTNL_MSG_TIMEOUT_MAX, + .cb = cttimeout_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); + +static int __init cttimeout_init(void) +{ + int ret; + + ret = nfnetlink_subsys_register(&cttimeout_subsys); + if (ret < 0) { + pr_err("cttimeout_init: cannot register cttimeout with " + "nfnetlink.\n"); + goto err_out; + } + return 0; + +err_out: + return ret; +} + +static void __exit cttimeout_exit(void) +{ + struct ctnl_timeout *cur, *tmp; + + pr_info("cttimeout: unregistering from nfnetlink.\n"); + + nfnetlink_subsys_unregister(&cttimeout_subsys); + list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) { + list_del_rcu(&cur->head); + /* We are sure that our objects have no clients at this point, + * it's safe to release them all without checking refcnt. + */ + kfree_rcu(cur, rcu_head); + } +} + +module_init(cttimeout_init); +module_exit(cttimeout_exit); -- cgit v1.2.3 From 8b2aaedee4eaa94e816144ed54b9707b96be29f8 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Wed, 7 Mar 2012 14:58:07 +0000 Subject: ipv6: Fix Smatch warning. With commit d6ddef9e641d(IPv6: Fix not join all-router mcast group when forwarding set.) I check 'dev' after it's dereference that leads to a Smatch complaint: net/ipv6/addrconf.c:438 ipv6_add_dev() warn: variable dereferenced before check 'dev' (see line 432) net/ipv6/addrconf.c 431 /* protected by rtnl_lock */ 432 rcu_assign_pointer(dev->ip6_ptr, ndev); ^^^^^^^^^^^^ Old dereference. 433 434 /* Join all-node multicast group */ 435 ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); 436 437 /* Join all-router multicast group if forwarding is set */ 438 if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST)) ^^^ Remove the check to avoid the complaint as 'dev' can't be NULL. Reported-by: Dan Carpenter Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6b8ebc5da0e1..6a3bb6077e19 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -435,7 +435,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); /* Join all-router multicast group if forwarding is set */ - if (ndev->cnf.forwarding && dev && (dev->flags & IFF_MULTICAST)) + if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST)) ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters); return ndev; -- cgit v1.2.3 From 1f85851e17b64cabd089a8a8839dddebc627948c Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 19 Mar 2012 22:36:10 +0000 Subject: ipv6: fix incorrent ipv6 ipsec packet fragment Since commit 299b0767(ipv6: Fix IPsec slowpath fragmentation problem) In func ip6_append_data,after call skb_put(skb, fraglen + dst_exthdrlen) the skb->len contains dst_exthdrlen,and we don't reduce dst_exthdrlen at last This will make fraggap>0 in next "while cycle",and cause the size of skb incorrent Fix this by reserve headroom for dst_exthdrlen. Signed-off-by: Gao feng Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7a98fc2a5d97..b7ca46161cb9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1414,8 +1414,9 @@ alloc_new_skb: */ skb->ip_summed = csummode; skb->csum = 0; - /* reserve for fragmentation */ - skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); + /* reserve for fragmentation and ipsec header */ + skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + + dst_exthdrlen); if (sk->sk_type == SOCK_DGRAM) skb_shinfo(skb)->tx_flags = tx_flags; @@ -1423,9 +1424,9 @@ alloc_new_skb: /* * Find where to start putting bytes */ - data = skb_put(skb, fraglen + dst_exthdrlen); - skb_set_network_header(skb, exthdrlen + dst_exthdrlen); - data += fragheaderlen + dst_exthdrlen; + data = skb_put(skb, fraglen); + skb_set_network_header(skb, exthdrlen); + data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { -- cgit v1.2.3