diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-08 20:40:42 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-08 20:40:42 -0500 |
commit | 9f0e896f35020d1e0e513b365a62fb5e06066764 (patch) | |
tree | 612b09a6f772149958ae12244c5da8b37337817a /net/ipv6 | |
parent | 0c3b34d804947fef7fb9e74912b7b7563729231e (diff) | |
parent | f998b6b10144cd9809da6af02758615f789e8aa1 (diff) | |
download | linux-9f0e896f35020d1e0e513b365a62fb5e06066764.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter/IPVS updates for net-next
The following patchset contains Netfilter/IPVS updates for your
net-next tree:
1) Free hooks via call_rcu to speed up netns release path, from
Florian Westphal.
2) Reduce memory footprint of hook arrays, skip allocation if family is
not present - useful in case decnet support is not compiled built-in.
Patches from Florian Westphal.
3) Remove defensive check for malformed IPv4 - including ihl field - and
IPv6 headers in x_tables and nf_tables.
4) Add generic flow table offload infrastructure for nf_tables, this
includes the netlink control plane and support for IPv4, IPv6 and
mixed IPv4/IPv6 dataplanes. This comes with NAT support too. This
patchset adds the IPS_OFFLOAD conntrack status bit to indicate that
this flow has been offloaded.
5) Add secpath matching support for nf_tables, from Florian.
6) Save some code bytes in the fast path for the nf_tables netdev,
bridge and inet families.
7) Allow one single NAT hook per point and do not allow to register NAT
hooks in nf_tables before the conntrack hook, patches from Florian.
8) Seven patches to remove the struct nf_af_info abstraction, instead
we perform direct calls for IPv4 which is faster. IPv6 indirections
are still needed to avoid dependencies with the 'ipv6' module, but
these now reside in struct nf_ipv6_ops.
9) Seven patches to handle NFPROTO_INET from the Netfilter core,
hence we can remove specific code in nf_tables to handle this
pseudofamily.
10) No need for synchronize_net() call for nf_queue after conversion
to hook arrays. Also from Florian.
11) Call cond_resched_rcu() when dumping large sets in ipset to avoid
softlockup. Again from Florian.
12) Pass lockdep_nfnl_is_held() to rcu_dereference_protected(), patch
from Florian Westphal.
13) Fix matching of counters in ipset, from Jozsef Kadlecsik.
14) Missing nfnl lock protection in the ip_set_net_exit path, also
from Jozsef.
15) Move connlimit code that we can reuse from nf_tables into
nf_conncount, from Florian Westhal.
And asorted cleanups:
16) Get rid of nft_dereference(), it only has one single caller.
17) Add nft_set_is_anonymous() helper function.
18) Remove NF_ARP_FORWARD leftover chain definition in nf_tables_arp.
19) Remove unnecessary comments in nf_conntrack_h323_asn1.c
From Varsha Rao.
20) Remove useless parameters in frag_safe_skb_hp(), from Gao Feng.
21) Constify layer 4 conntrack protocol definitions, function
parameters to register/unregister these protocol trackers, and
timeouts. Patches from Florian Westphal.
22) Remove nlattr_size indirection, from Florian Westphal.
23) Add fall-through comments as -Wimplicit-fallthrough needs this,
from Gustavo A. R. Silva.
24) Use swap() macro to exchange values in ipset, patch from
Gustavo A. R. Silva.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/ip6_output.c | 3 | ||||
-rw-r--r-- | net/ipv6/netfilter.c | 44 | ||||
-rw-r--r-- | net/ipv6/netfilter/Kconfig | 8 | ||||
-rw-r--r-- | net/ipv6/netfilter/Makefile | 3 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 26 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_mangle.c | 8 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6table_nat.c | 4 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 7 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_flow_table_ipv6.c | 278 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 8 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_tables_ipv6.c | 35 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_chain_nat_ipv6.c | 3 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_chain_route_ipv6.c | 3 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_fib_ipv6.c | 12 |
15 files changed, 335 insertions, 111 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index bcdb615aed6e..19adad6d90bc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } -static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) { unsigned int mtu; struct inet6_dev *idev; @@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) return mtu; } +EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward); static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 39970e212ad5..d95ceca7ff8f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_route_me_harder); -/* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - -struct ip6_rt_info { - struct in6_addr daddr; - struct in6_addr saddr; - u_int32_t mark; -}; - -static void nf_ip6_saveroute(const struct sk_buff *skb, - struct nf_queue_entry *entry) -{ - struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct ipv6hdr *iph = ipv6_hdr(skb); - - rt_info->daddr = iph->daddr; - rt_info->saddr = iph->saddr; - rt_info->mark = skb->mark; - } -} - -static int nf_ip6_reroute(struct net *net, struct sk_buff *skb, +static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry) { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); @@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb, if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || skb->mark != rt_info->mark) - return ip6_route_me_harder(net, skb); + return ip6_route_me_harder(entry->state.net, skb); } return 0; } @@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, }; static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment -}; - -static const struct nf_afinfo nf_ip6_afinfo = { - .family = AF_INET6, + .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input, + .fragment = ip6_fragment, .checksum = nf_ip6_checksum, .checksum_partial = nf_ip6_checksum_partial, .route = nf_ip6_route, - .saveroute = nf_ip6_saveroute, .reroute = nf_ip6_reroute, - .route_key_size = sizeof(struct ip6_rt_info), }; int __init ipv6_netfilter_init(void) { RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); - return nf_register_afinfo(&nf_ip6_afinfo); + return 0; } /* This can be called from inet6_init() on errors, so it cannot @@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void) void ipv6_netfilter_fini(void) { RCU_INIT_POINTER(nf_ipv6_ops, NULL); - nf_unregister_afinfo(&nf_ip6_afinfo); } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6acb2eecd986..806e95375ec8 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -71,6 +71,14 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES +config NF_FLOW_TABLE_IPV6 + select NF_FLOW_TABLE + tristate "Netfilter flow table IPv6 module" + help + This option adds the flow table IPv6 support. + + To compile it as a module, choose M here. + config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index c6ee0cdd0ba9..95611c4b39b0 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o +# flow table support +obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o + # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1d7ae9366335..6ebbef2dfb60 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -991,9 +991,8 @@ static int get_info(struct net *net, void __user *user, if (compat) xt_compat_lock(AF_INET6); #endif - t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), - "ip6table_%s", name); - if (t) { + t = xt_request_find_table_lock(net, AF_INET6, name); + if (!IS_ERR(t)) { struct ip6t_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT @@ -1023,7 +1022,7 @@ static int get_info(struct net *net, void __user *user, xt_table_unlock(t); module_put(t->me); } else - ret = -ENOENT; + ret = PTR_ERR(t); #ifdef CONFIG_COMPAT if (compat) xt_compat_unlock(AF_INET6); @@ -1049,7 +1048,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); - if (t) { + if (!IS_ERR(t)) { struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1060,7 +1059,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); return ret; } @@ -1083,10 +1082,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, goto out; } - t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name), - "ip6table_%s", name); - if (!t) { - ret = -ENOENT; + t = xt_request_find_table_lock(net, AF_INET6, name); + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } @@ -1199,8 +1197,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET6, tmp.name); - if (!t) { - ret = -ENOENT; + if (IS_ERR(t)) { + ret = PTR_ERR(t); goto free; } @@ -1636,7 +1634,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); - if (t) { + if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); @@ -1650,7 +1648,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, module_put(t->me); xt_table_unlock(t); } else - ret = -ENOENT; + ret = PTR_ERR(t); xt_compat_unlock(AF_INET6); return ret; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 2b1a9dcdbcb3..b0524b18c4fb 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) u_int8_t hop_limit; u_int32_t flowlabel, mark; int err; -#if 0 - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - net_warn_ratelimited("ip6t_hook: happy cracking\n"); - return NF_ACCEPT; - } -#endif /* save source/dest address, mark, hoplimit, flowlabel, priority, */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 991512576c8c..47306e45a80a 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6table_nat_in, .pf = NFPROTO_IPV6, + .nat_hook = true, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_NAT_DST, }, @@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6table_nat_out, .pf = NFPROTO_IPV6, + .nat_hook = true, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP6_PRI_NAT_SRC, }, @@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { { .hook = ip6table_nat_local_fn, .pf = NFPROTO_IPV6, + .nat_hook = true, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_NAT_DST, }, /* After packet filtering, change source */ { .hook = ip6table_nat_fn, + .nat_hook = true, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 3b80a38f62b8..11a313fd9273 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) { - net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); - return NF_ACCEPT; - } return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } @@ -368,7 +363,7 @@ static struct nf_sockopt_ops so_getorigdst6 = { .owner = THIS_MODULE, }; -static struct nf_conntrack_l4proto *builtin_l4proto6[] = { +static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = { &nf_conntrack_l4proto_tcp6, &nf_conntrack_l4proto_udp6, &nf_conntrack_l4proto_icmpv6, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 3ac0d826afc4..2548e2c8aedd 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -27,7 +27,7 @@ #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> #include <net/netfilter/nf_log.h> -static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; static inline struct nf_icmp_net *icmpv6_pernet(struct net *net) { @@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net) return &net->ct.nf_ct_proto.icmpv6.pn; } -struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = +const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c new file mode 100644 index 000000000000..0c3b9d32f64f --- /dev/null +++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c @@ -0,0 +1,278 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/rhashtable.h> +#include <linux/ipv6.h> +#include <linux/netdevice.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/neighbour.h> +#include <net/netfilter/nf_flow_table.h> +#include <net/netfilter/nf_tables.h> +/* For layer 4 checksum field offset. */ +#include <linux/tcp.h> +#include <linux/udp.h> + +static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) +{ + struct tcphdr *tcph; + + if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) || + skb_try_make_writable(skb, thoff + sizeof(*tcph))) + return -1; + + tcph = (void *)(skb_network_header(skb) + thoff); + inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, + new_addr->s6_addr32, true); + + return 0; +} + +static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) +{ + struct udphdr *udph; + + if (!pskb_may_pull(skb, thoff + sizeof(*udph)) || + skb_try_make_writable(skb, thoff + sizeof(*udph))) + return -1; + + udph = (void *)(skb_network_header(skb) + thoff); + if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { + inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, + new_addr->s6_addr32, true); + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + return 0; +} + +static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, struct in6_addr *addr, + struct in6_addr *new_addr) +{ + switch (ip6h->nexthdr) { + case IPPROTO_TCP: + if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + case IPPROTO_UDP: + if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) + return NF_DROP; + break; + } + + return 0; +} + +static int nf_flow_snat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6; + ip6h->saddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; + ip6h->daddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +} + +static int nf_flow_dnat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) +{ + struct in6_addr addr, new_addr; + + switch (dir) { + case FLOW_OFFLOAD_DIR_ORIGINAL: + addr = ip6h->daddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6; + ip6h->daddr = new_addr; + break; + case FLOW_OFFLOAD_DIR_REPLY: + addr = ip6h->saddr; + new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; + ip6h->saddr = new_addr; + break; + default: + return -1; + } + + return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); +} + +static int nf_flow_nat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, + enum flow_offload_tuple_dir dir) +{ + struct ipv6hdr *ip6h = ipv6_hdr(skb); + unsigned int thoff = sizeof(*ip6h); + + if (flow->flags & FLOW_OFFLOAD_SNAT && + (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + return -1; + if (flow->flags & FLOW_OFFLOAD_DNAT && + (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + return -1; + + return 0; +} + +static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, + struct flow_offload_tuple *tuple) +{ + struct flow_ports *ports; + struct ipv6hdr *ip6h; + unsigned int thoff; + + if (!pskb_may_pull(skb, sizeof(*ip6h))) + return -1; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_TCP && + ip6h->nexthdr != IPPROTO_UDP) + return -1; + + thoff = sizeof(*ip6h); + if (!pskb_may_pull(skb, thoff + sizeof(*ports))) + return -1; + + ports = (struct flow_ports *)(skb_network_header(skb) + thoff); + + tuple->src_v6 = ip6h->saddr; + tuple->dst_v6 = ip6h->daddr; + tuple->src_port = ports->source; + tuple->dst_port = ports->dest; + tuple->l3proto = AF_INET6; + tuple->l4proto = ip6h->nexthdr; + tuple->iifidx = dev->ifindex; + + return 0; +} + +/* Based on ip_exceeds_mtu(). */ +static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + return false; + + return true; +} + +static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt) +{ + u32 mtu; + + mtu = ip6_dst_mtu_forward(&rt->dst); + if (__nf_flow_exceeds_mtu(skb, mtu)) + return true; + + return false; +} + +unsigned int +nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct flow_offload_tuple_rhash *tuplehash; + struct nf_flowtable *flow_table = priv; + struct flow_offload_tuple tuple = {}; + enum flow_offload_tuple_dir dir; + struct flow_offload *flow; + struct net_device *outdev; + struct in6_addr *nexthop; + struct ipv6hdr *ip6h; + struct rt6_info *rt; + + if (skb->protocol != htons(ETH_P_IPV6)) + return NF_ACCEPT; + + if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) + return NF_ACCEPT; + + tuplehash = flow_offload_lookup(flow_table, &tuple); + if (tuplehash == NULL) + return NF_ACCEPT; + + outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); + if (!outdev) + return NF_ACCEPT; + + dir = tuplehash->tuple.dir; + flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); + + rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; + if (unlikely(nf_flow_exceeds_mtu(skb, rt))) + return NF_ACCEPT; + + if (skb_try_make_writable(skb, sizeof(*ip6h))) + return NF_DROP; + + if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && + nf_flow_nat_ipv6(flow, skb, dir) < 0) + return NF_DROP; + + flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; + ip6h = ipv6_hdr(skb); + ip6h->hop_limit--; + + skb->dev = outdev; + nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); + neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); + + return NF_STOLEN; +} +EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook); + +static struct nf_flowtable_type flowtable_ipv6 = { + .family = NFPROTO_IPV6, + .params = &nf_flow_offload_rhash_params, + .gc = nf_flow_offload_work_gc, + .hook = nf_flow_offload_ipv6_hook, + .owner = THIS_MODULE, +}; + +static int __init nf_flow_ipv6_module_init(void) +{ + nft_register_flowtable_type(&flowtable_ipv6); + + return 0; +} + +static void __exit nf_flow_ipv6_module_exit(void) +{ + nft_unregister_flowtable_type(&flowtable_ipv6); +} + +module_init(nf_flow_ipv6_module_init); +module_exit(nf_flow_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NF_FLOWTABLE(AF_INET6); diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 1d2fb9267d6f..bed57ee65f7b 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb, #endif unsigned int ret; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) - return NF_ACCEPT; - ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && @@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, unsigned int ret; int err; - /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) - return NF_ACCEPT; - ret = nf_nat_ipv6_fn(priv, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d6e4ba5de916..9cd45b964123 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,39 +22,17 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); return nft_do_chain(&pkt, priv); } -static unsigned int nft_ipv6_output(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (unlikely(skb->len < sizeof(struct ipv6hdr))) { - if (net_ratelimit()) - pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " - "packet\n"); - return NF_ACCEPT; - } - - return nft_do_chain_ipv6(priv, skb, state); -} - -struct nft_af_info nft_af_ipv6 __read_mostly = { +static struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, - .nops = 1, - .hooks = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_ipv6_output, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, }; -EXPORT_SYMBOL_GPL(nft_af_ipv6); static int nf_tables_ipv6_init_net(struct net *net) { @@ -94,6 +72,13 @@ static const struct nf_chain_type filter_ipv6 = { (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, + [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, + }, }; static int __init nf_tables_ipv6_init(void) diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 443cd306c0b0..73fe2bd13fcf 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); return nft_do_chain(&pkt, priv); } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index f2727475895e..11d3c3b9aa18 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv, u32 mark, flowlabel; int err; - nft_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_ipv6(&pkt, skb); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 54b5899543ef..cc5174c7254c 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, { const struct net_device *dev = NULL; const struct nf_ipv6_ops *v6ops; - const struct nf_afinfo *afinfo; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - afinfo = nf_get_afinfo(NFPROTO_IPV6); - if (!afinfo) + v6ops = nf_get_ipv6_ops(); + if (!v6ops) return RTN_UNREACHABLE; if (priv->flags & NFTA_FIB_F_IIF) @@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - v6ops = nf_get_ipv6_ops(); - if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt, - flowi6_to_flowi(&fl6), false); + route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + flowi6_to_flowi(&fl6), false); if (route_err) goto err; |