From 6f7edb4881bf51300060e89915926e070ace8c4d Mon Sep 17 00:00:00 2001 From: "Catalin(ux) M. BOIE" Date: Tue, 5 Jan 2010 05:50:24 +0100 Subject: IPVS: Allow boot time change of hash size I was very frustrated about the fact that I have to recompile the kernel to change the hash size. So, I created this patch. If IPVS is built-in you can append ip_vs.conn_tab_bits=?? to kernel command line, or, if you built IPVS as modules, you can add options ip_vs conn_tab_bits=??. To keep everything backward compatible, you still can select the size at compile time, and that will be used as default. It has been about a year since this patch was originally posted and subsequently dropped on the basis of insufficient test data. Mark Bergsma has provided the following test results which seem to strongly support the need for larger hash table sizes: We do however run into the same problem with the default setting (212 = 4096 entries), as most of our LVS balancers handle around a million connections/SLAB entries at any point in time (around 100-150 kpps load). With only 4096 hash table entries this implies that each entry consists of a linked list of 256 connections *on average*. To provide some statistics, I did an oprofile run on an 2.6.31 kernel, with both the default 4096 table size, and the same kernel recompiled with IP_VS_CONN_TAB_BITS set to 18 (218 = 262144 entries). I built a quick test setup with a part of Wikimedia/Wikipedia's live traffic mirrored by the switch to the test host. With the default setting, at ~ 120 kpps packet load we saw a typical %si CPU usage of around 30-35%, and oprofile reported a hot spot in ip_vs_conn_in_get: samples % image name app name symbol name 1719761 42.3741 ip_vs.ko ip_vs.ko ip_vs_conn_in_get 302577 7.4554 bnx2 bnx2 /bnx2 181984 4.4840 vmlinux vmlinux __ticket_spin_lock 128636 3.1695 vmlinux vmlinux ip_route_input 74345 1.8318 ip_vs.ko ip_vs.ko ip_vs_conn_out_get 68482 1.6874 vmlinux vmlinux mwait_idle After loading the recompiled kernel with 218 entries, %si CPU usage dropped in half to around 12-18%, and oprofile looks much healthier, with only 7% spent in ip_vs_conn_in_get: samples % image name app name symbol name 265641 14.4616 bnx2 bnx2 /bnx2 143251 7.7986 vmlinux vmlinux __ticket_spin_lock 140661 7.6576 ip_vs.ko ip_vs.ko ip_vs_conn_in_get 94364 5.1372 vmlinux vmlinux mwait_idle 86267 4.6964 vmlinux vmlinux ip_route_input [ horms@verge.net.au: trivial up-port and minor style fixes ] Signed-off-by: Catalin(ux) M. BOIE Cc: Mark Bergsma Signed-off-by: Simon Horman Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8dc3296b7bea..a816c37417bb 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -26,6 +26,11 @@ #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ + +/* Connections' size value needed by ip_vs_ctl.c */ +extern int ip_vs_conn_tab_size; + + struct ip_vs_iphdr { int len; __u8 protocol; @@ -592,17 +597,6 @@ extern void ip_vs_init_hash_table(struct list_head *table, int rows); * (from ip_vs_conn.c) */ -/* - * IPVS connection entry hash table - */ -#ifndef CONFIG_IP_VS_TAB_BITS -#define CONFIG_IP_VS_TAB_BITS 12 -#endif - -#define IP_VS_CONN_TAB_BITS CONFIG_IP_VS_TAB_BITS -#define IP_VS_CONN_TAB_SIZE (1 << IP_VS_CONN_TAB_BITS) -#define IP_VS_CONN_TAB_MASK (IP_VS_CONN_TAB_SIZE - 1) - enum { IP_VS_DIR_INPUT = 0, IP_VS_DIR_OUTPUT, -- cgit v1.2.3 From cd8c20b650f49354722b8cc1f03320b004815a0a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 13 Jan 2010 16:02:14 +0100 Subject: netfilter: nfnetlink: netns support Make nfnl socket per-petns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink.h | 8 ++--- include/net/net_namespace.h | 2 ++ net/netfilter/nf_conntrack_netlink.c | 13 ++++---- net/netfilter/nfnetlink.c | 65 +++++++++++++++++++++++------------- net/netfilter/nfnetlink_log.c | 3 +- net/netfilter/nfnetlink_queue.c | 2 +- 6 files changed, 58 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 49d321f3ccd2..53923868c9bd 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -73,11 +73,11 @@ struct nfnetlink_subsystem { extern int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); extern int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n); -extern int nfnetlink_has_listeners(unsigned int group); -extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, +extern int nfnetlink_has_listeners(struct net *net, unsigned int group); +extern int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags); -extern void nfnetlink_set_err(u32 pid, u32 group, int error); -extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +extern void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error); +extern int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags); extern void nfnl_lock(void); extern void nfnl_unlock(void); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index f307e133d14c..82b7be4db89a 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -81,6 +81,8 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif + struct sock *nfnl; + struct sock *nfnl_stash; #endif #ifdef CONFIG_XFRM struct netns_xfrm xfrm; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 59d8064eb522..d4c5d06677f9 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -482,7 +482,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) } else return 0; - if (!item->report && !nfnetlink_has_listeners(group)) + if (!item->report && !nfnetlink_has_listeners(&init_net, group)) return 0; skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); @@ -559,7 +559,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + err = nfnetlink_send(skb, &init_net, item->pid, group, item->report, + GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -571,7 +572,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, group, -ENOBUFS); + nfnetlink_set_err(&init_net, 0, group, -ENOBUFS); return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -1539,7 +1540,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) return 0; if (!item->report && - !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) + !nfnetlink_has_listeners(&init_net, NFNLGRP_CONNTRACK_EXP_NEW)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -1562,7 +1563,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + nfnetlink_send(skb, &init_net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report, GFP_ATOMIC); return 0; @@ -1572,7 +1573,7 @@ nla_put_failure: nlmsg_failure: kfree_skb(skb); errout: - nfnetlink_set_err(0, 0, -ENOBUFS); + nfnetlink_set_err(&init_net, 0, 0, -ENOBUFS); return 0; } #endif diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index eedc0c1ac7a4..8eb0cc23ada3 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -40,7 +40,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; -static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); @@ -101,34 +100,35 @@ nfnetlink_find_client(u_int16_t type, const struct nfnetlink_subsystem *ss) return &ss->cb[cb_id]; } -int nfnetlink_has_listeners(unsigned int group) +int nfnetlink_has_listeners(struct net *net, unsigned int group) { - return netlink_has_listeners(nfnl, group); + return netlink_has_listeners(net->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, flags); + return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); -void nfnetlink_set_err(u32 pid, u32 group, int error) +void nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) { - netlink_set_err(nfnl, pid, group, error); + netlink_set_err(net->nfnl, pid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) { - return netlink_unicast(nfnl, skb, pid, flags); + return netlink_unicast(net->nfnl, skb, pid, flags); } EXPORT_SYMBOL_GPL(nfnetlink_unicast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; const struct nfnetlink_subsystem *ss; int type, err; @@ -170,7 +170,7 @@ replay: if (err < 0) return err; - err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda); + err = nc->call(net->nfnl, skb, nlh, (const struct nlattr **)cda); if (err == -EAGAIN) goto replay; return err; @@ -184,26 +184,45 @@ static void nfnetlink_rcv(struct sk_buff *skb) nfnl_unlock(); } -static void __exit nfnetlink_exit(void) +static int __net_init nfnetlink_net_init(struct net *net) { - printk("Removing netfilter NETLINK layer.\n"); - netlink_kernel_release(nfnl); - return; + struct sock *nfnl; + + nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, NULL, THIS_MODULE); + if (!nfnl) + return -ENOMEM; + net->nfnl_stash = nfnl; + rcu_assign_pointer(net->nfnl, nfnl); + return 0; } -static int __init nfnetlink_init(void) +static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { - printk("Netfilter messages via NETLINK v%s.\n", nfversion); + struct net *net; - nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, - nfnetlink_rcv, NULL, THIS_MODULE); - if (!nfnl) { - printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -ENOMEM; - } + list_for_each_entry(net, net_exit_list, exit_list) + rcu_assign_pointer(net->nfnl, NULL); + synchronize_net(); + list_for_each_entry(net, net_exit_list, exit_list) + netlink_kernel_release(net->nfnl_stash); +} - return 0; +static struct pernet_operations nfnetlink_net_ops = { + .init = nfnetlink_net_init, + .exit_batch = nfnetlink_net_exit_batch, +}; + +static int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + return register_pernet_subsys(&nfnetlink_net_ops); } +static void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + unregister_pernet_subsys(&nfnetlink_net_ops); +} module_init(nfnetlink_init); module_exit(nfnetlink_exit); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9de0470d557e..285e9029a9ff 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -323,7 +323,8 @@ __nfulnl_send(struct nfulnl_instance *inst) NLMSG_DONE, sizeof(struct nfgenmsg)); - status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + MSG_DONTWAIT); inst->qlen = 0; inst->skb = NULL; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7e3fa410641e..5c589b27d6eb 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -420,7 +420,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; -- cgit v1.2.3 From e9d3897cc2205eec8b7afcc022e4730914b4f48c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:08:37 +0100 Subject: netfilter: netns: #ifdef ->iptable_security, ->ip6table_security 'security' tables depend on SECURITY, so ifdef them. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/ipv4.h | 2 ++ include/net/netns/ipv6.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d6258..8098f6b8319d 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -37,7 +37,9 @@ struct netns_ipv4 { struct xt_table *iptable_mangle; struct xt_table *iptable_raw; struct xt_table *arptable_filter; +#ifdef CONFIG_SECURITY struct xt_table *iptable_security; +#endif struct xt_table *nat_table; struct hlist_head *nat_bysource; int nat_vmalloced; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index dfeb2d7c425b..1f11ebc22151 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -36,7 +36,9 @@ struct netns_ipv6 { struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; struct xt_table *ip6table_raw; +#ifdef CONFIG_SECURITY struct xt_table *ip6table_security; +#endif #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; -- cgit v1.2.3 From a83d8e8d099fc373a5ca7112ad08c553bb2c180f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:21:13 +0100 Subject: netfilter: xtables: add struct xt_mtchk_param::net Some complex match modules (like xt_hashlimit/xt_recent) want netns information at constructor and destructor time. We propably can play games at match destruction time, because netns can be passed in object, but I think it's cleaner to explicitly pass netns. Add ->net, make sure it's set from ebtables/iptables/ip6tables code. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 1 + net/bridge/netfilter/ebtables.c | 14 +++++++++----- net/ipv4/netfilter/ip_tables.c | 24 ++++++++++++++---------- net/ipv6/netfilter/ip6_tables.c | 14 ++++++++------ 4 files changed, 32 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 378f27ae7772..88261b9829a7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -205,6 +205,7 @@ struct xt_match_param { * @hook_mask: via which hooks the new rule is reachable */ struct xt_mtchk_param { + struct net *net; const char *table; const void *entryinfo; const struct xt_match *match; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index bd1c65425d4f..c77bab986696 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -619,7 +619,9 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) } static inline int -ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, +ebt_check_entry(struct ebt_entry *e, + struct net *net, + struct ebt_table_info *newinfo, const char *name, unsigned int *cnt, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { @@ -671,6 +673,7 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, } i = 0; + mtpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; mtpar.hook_mask = tgpar.hook_mask = hookmask; @@ -808,7 +811,8 @@ letscontinue: } /* do the parsing of the table/chains/entries/matches/watchers/targets, heh */ -static int translate_table(char *name, struct ebt_table_info *newinfo) +static int translate_table(struct net *net, char *name, + struct ebt_table_info *newinfo) { unsigned int i, j, k, udc_cnt; int ret; @@ -917,7 +921,7 @@ static int translate_table(char *name, struct ebt_table_info *newinfo) /* used to know what we need to clean up if something goes wrong */ i = 0; ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_check_entry, newinfo, name, &i, cl_s, udc_cnt); + ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, ebt_cleanup_entry, &i); @@ -1017,7 +1021,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) if (ret != 0) goto free_counterstmp; - ret = translate_table(tmp.name, newinfo); + ret = translate_table(net, tmp.name, newinfo); if (ret != 0) goto free_counterstmp; @@ -1154,7 +1158,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table) newinfo->hook_entry[i] = p + ((char *)repl->hook_entry[i] - repl->entries); } - ret = translate_table(repl->name, newinfo); + ret = translate_table(net, repl->name, newinfo); if (ret != 0) { BUGPRINT("Translate_table failed\n"); goto free_chainstack; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 572330a552ef..a069d72d9482 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -661,8 +661,8 @@ static int check_target(struct ipt_entry *e, const char *name) } static int -find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size, unsigned int *i) { struct ipt_entry_target *t; struct xt_target *target; @@ -675,6 +675,7 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; @@ -798,7 +799,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, +translate_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, void *entry0, @@ -860,7 +862,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + find_check_entry, net, name, size, &i); if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, @@ -1303,7 +1305,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, + ret = translate_table(net, tmp.name, tmp.valid_hooks, newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) @@ -1655,7 +1657,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, } static int -compat_check_entry(struct ipt_entry *e, const char *name, +compat_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int *i) { struct xt_mtchk_param mtpar; @@ -1663,6 +1665,7 @@ compat_check_entry(struct ipt_entry *e, const char *name, int ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; @@ -1684,7 +1687,8 @@ compat_check_entry(struct ipt_entry *e, const char *name, } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1773,7 +1777,7 @@ translate_compat_table(const char *name, i = 0; ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + net, name, &i); if (ret) { j -= i; COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, @@ -1833,7 +1837,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -2086,7 +2090,7 @@ struct xt_table *ipt_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, + ret = translate_table(net, table->name, table->valid_hooks, newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 480d7f8c9802..a825940a92ef 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -693,8 +693,8 @@ static int check_target(struct ip6t_entry *e, const char *name) } static int -find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, - unsigned int *i) +find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, + unsigned int size, unsigned int *i) { struct ip6t_entry_target *t; struct xt_target *target; @@ -707,6 +707,7 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, return ret; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; @@ -830,7 +831,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, +translate_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, void *entry0, @@ -892,7 +894,7 @@ translate_table(const char *name, /* Finally, each sanity check must pass */ i = 0; ret = IP6T_ENTRY_ITERATE(entry0, newinfo->size, - find_check_entry, name, size, &i); + find_check_entry, net, name, size, &i); if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, @@ -1336,7 +1338,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_table(tmp.name, tmp.valid_hooks, + ret = translate_table(net, tmp.name, tmp.valid_hooks, newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); if (ret != 0) @@ -2121,7 +2123,7 @@ struct xt_table *ip6t_register_table(struct net *net, loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, + ret = translate_table(net, table->name, table->valid_hooks, newinfo, loc_cpu_entry, repl->size, repl->num_entries, repl->hook_entry, -- cgit v1.2.3 From f54e9367f8499a9bf6b2afbc0dce63e1d53c525a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 18 Jan 2010 08:25:47 +0100 Subject: netfilter: xtables: add struct xt_mtdtor_param::net Add ->net to match destructor list like ->net in constructor list. Make sure it's set in ebtables/iptables/ip6tables, this requires to propagate netns up to *_unregister_table(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 1 + include/linux/netfilter_bridge/ebtables.h | 2 +- include/linux/netfilter_ipv4/ip_tables.h | 2 +- include/linux/netfilter_ipv6/ip6_tables.h | 2 +- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/bridge/netfilter/ebtables.c | 19 ++++++++-------- net/ipv4/netfilter/ip_tables.c | 25 +++++++++++---------- net/ipv4/netfilter/iptable_filter.c | 2 +- net/ipv4/netfilter/iptable_mangle.c | 2 +- net/ipv4/netfilter/iptable_raw.c | 2 +- net/ipv4/netfilter/iptable_security.c | 2 +- net/ipv4/netfilter/nf_nat_rule.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 37 +++++++++++++++++-------------- net/ipv6/netfilter/ip6table_filter.c | 2 +- net/ipv6/netfilter/ip6table_mangle.c | 2 +- net/ipv6/netfilter/ip6table_raw.c | 2 +- net/ipv6/netfilter/ip6table_security.c | 2 +- 19 files changed, 59 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 88261b9829a7..3caf5e151102 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -216,6 +216,7 @@ struct xt_mtchk_param { /* Match destructor parameters */ struct xt_mtdtor_param { + struct net *net; const struct xt_match *match; void *matchinfo; u_int8_t family; diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 3cc40c131cc3..1c6f0c5f530e 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -289,7 +289,7 @@ struct ebt_table { ~(__alignof__(struct ebt_replace)-1)) extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); -extern void ebt_unregister_table(struct ebt_table *table); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 27b3f5807305..8d1f273d350b 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -242,7 +242,7 @@ extern void ipt_init(void) __init; extern struct xt_table *ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl); -extern void ipt_unregister_table(struct xt_table *table); +extern void ipt_unregister_table(struct net *net, struct xt_table *table); /* Standard entry. */ struct ipt_standard { diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index b31050d20ae4..d2952d2fa658 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -300,7 +300,7 @@ extern void ip6t_init(void) __init; extern struct xt_table *ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl); -extern void ip6t_unregister_table(struct xt_table *table); +extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index d32ab13e728c..ae3f106c3908 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -71,7 +71,7 @@ static int __net_init broute_net_init(struct net *net) static void __net_exit broute_net_exit(struct net *net) { - ebt_unregister_table(net->xt.broute_table); + ebt_unregister_table(net, net->xt.broute_table); } static struct pernet_operations broute_net_ops = { diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 60b1a6ca7185..42e6bd094574 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -107,7 +107,7 @@ static int __net_init frame_filter_net_init(struct net *net) static void __net_exit frame_filter_net_exit(struct net *net) { - ebt_unregister_table(net->xt.frame_filter); + ebt_unregister_table(net, net->xt.frame_filter); } static struct pernet_operations frame_filter_net_ops = { diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 4a98804203b0..6dc2f878ae05 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -107,7 +107,7 @@ static int __net_init frame_nat_net_init(struct net *net) static void __net_exit frame_nat_net_exit(struct net *net) { - ebt_unregister_table(net->xt.frame_nat); + ebt_unregister_table(net, net->xt.frame_nat); } static struct pernet_operations frame_nat_net_ops = { diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c77bab986696..1aa0e4c1f52d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -561,13 +561,14 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, } static inline int -ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) +ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.match; par.matchinfo = m->data; par.family = NFPROTO_BRIDGE; @@ -595,7 +596,7 @@ ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) } static inline int -ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) +ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) { struct xt_tgdtor_param par; struct ebt_entry_target *t; @@ -606,7 +607,7 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) if (cnt && (*cnt)-- == 0) return 1; EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); - EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); par.target = t->u.target; @@ -731,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e, cleanup_watchers: EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); cleanup_matches: - EBT_MATCH_ITERATE(e, ebt_cleanup_match, &i); + EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; } @@ -924,7 +925,7 @@ static int translate_table(struct net *net, char *name, ebt_check_entry, net, newinfo, name, &i, cl_s, udc_cnt); if (ret != 0) { EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, &i); + ebt_cleanup_entry, net, &i); } vfree(cl_s); return ret; @@ -1074,7 +1075,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) /* decrease module count and free resources */ EBT_ENTRY_ITERATE(table->entries, table->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); vfree(table->entries); if (table->chainstack) { @@ -1091,7 +1092,7 @@ free_unlock: mutex_unlock(&ebt_mutex); free_iterate: EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); free_counterstmp: vfree(counterstmp); /* can be initialized in translate_table() */ @@ -1208,7 +1209,7 @@ out: return ERR_PTR(ret); } -void ebt_unregister_table(struct ebt_table *table) +void ebt_unregister_table(struct net *net, struct ebt_table *table) { int i; @@ -1220,7 +1221,7 @@ void ebt_unregister_table(struct ebt_table *table) list_del(&table->list); mutex_unlock(&ebt_mutex); EBT_ENTRY_ITERATE(table->private->entries, table->private->entries_size, - ebt_cleanup_entry, NULL); + ebt_cleanup_entry, net, NULL); if (table->private->nentries) module_put(table->me); vfree(table->private->entries); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a069d72d9482..cfaba0e2e6fc 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -553,13 +553,14 @@ mark_source_chains(struct xt_table_info *newinfo, } static int -cleanup_match(struct ipt_entry_match *m, unsigned int *i) +cleanup_match(struct ipt_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; @@ -705,7 +706,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, err: module_put(t->u.kernel.target->me); cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + IPT_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } @@ -775,7 +776,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, } static int -cleanup_entry(struct ipt_entry *e, unsigned int *i) +cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; struct ipt_entry_target *t; @@ -784,7 +785,7 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) return 1; /* Cleanup all matches */ - IPT_MATCH_ITERATE(e, cleanup_match, NULL); + IPT_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ipt_get_target(e); par.target = t->u.kernel.target; @@ -866,7 +867,7 @@ translate_table(struct net *net, if (ret != 0) { IPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, net, &i); return ret; } @@ -1260,7 +1261,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + net, NULL); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1320,7 +1321,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1682,7 +1683,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name, return 0; cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + IPT_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } @@ -1782,7 +1783,7 @@ translate_compat_table(struct net *net, j -= i; COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, compat_release_entry, &j); - IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i); xt_free_table_info(newinfo); return ret; } @@ -1853,7 +1854,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -2112,7 +2113,7 @@ out: return ERR_PTR(ret); } -void ipt_unregister_table(struct xt_table *table) +void ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -2122,7 +2123,7 @@ void ipt_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index df566cbd68e5..dee90eb8aa47 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -138,7 +138,7 @@ static int __net_init iptable_filter_net_init(struct net *net) static void __net_exit iptable_filter_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_filter); + ipt_unregister_table(net, net->ipv4.iptable_filter); } static struct pernet_operations iptable_filter_net_ops = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index fae78c3076c4..e07bf242343a 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -208,7 +208,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) static void __net_exit iptable_mangle_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_mangle); + ipt_unregister_table(net, net->ipv4.iptable_mangle); } static struct pernet_operations iptable_mangle_net_ops = { diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 993edc23be09..40f2b9f611a2 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -100,7 +100,7 @@ static int __net_init iptable_raw_net_init(struct net *net) static void __net_exit iptable_raw_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_raw); + ipt_unregister_table(net, net->ipv4.iptable_raw); } static struct pernet_operations iptable_raw_net_ops = { diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 3bd3d6388da5..7ce2366e4305 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -138,7 +138,7 @@ static int __net_init iptable_security_net_init(struct net *net) static void __net_exit iptable_security_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.iptable_security); + ipt_unregister_table(net, net->ipv4.iptable_security); } static struct pernet_operations iptable_security_net_ops = { diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 9e81e0dfb4ec..85da34fdc755 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -195,7 +195,7 @@ static int __net_init nf_nat_rule_net_init(struct net *net) static void __net_exit nf_nat_rule_net_exit(struct net *net) { - ipt_unregister_table(net->ipv4.nat_table); + ipt_unregister_table(net, net->ipv4.nat_table); } static struct pernet_operations nf_nat_rule_net_ops = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a825940a92ef..9f1d45f2ba8f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -585,13 +585,14 @@ mark_source_chains(struct xt_table_info *newinfo, } static int -cleanup_match(struct ip6t_entry_match *m, unsigned int *i) +cleanup_match(struct ip6t_entry_match *m, struct net *net, unsigned int *i) { struct xt_mtdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV6; @@ -737,7 +738,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, err: module_put(t->u.kernel.target->me); cleanup_matches: - IP6T_MATCH_ITERATE(e, cleanup_match, &j); + IP6T_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } @@ -807,7 +808,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, } static int -cleanup_entry(struct ip6t_entry *e, unsigned int *i) +cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; struct ip6t_entry_target *t; @@ -816,7 +817,7 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) return 1; /* Cleanup all matches */ - IP6T_MATCH_ITERATE(e, cleanup_match, NULL); + IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ip6t_get_target(e); par.target = t->u.kernel.target; @@ -898,7 +899,7 @@ translate_table(struct net *net, if (ret != 0) { IP6T_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + cleanup_entry, net, &i); return ret; } @@ -1293,7 +1294,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, - NULL); + net, NULL); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) @@ -1353,7 +1354,7 @@ do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1692,14 +1693,15 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr, return ret; } -static int compat_check_entry(struct ip6t_entry *e, const char *name, - unsigned int *i) +static int compat_check_entry(struct ip6t_entry *e, struct net *net, + const char *name, unsigned int *i) { unsigned int j; int ret; struct xt_mtchk_param mtpar; j = 0; + mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ipv6; mtpar.hook_mask = e->comefrom; @@ -1716,12 +1718,13 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name, return 0; cleanup_matches: - IP6T_MATCH_ITERATE(e, cleanup_match, &j); + IP6T_MATCH_ITERATE(e, cleanup_match, net, &j); return ret; } static int -translate_compat_table(const char *name, +translate_compat_table(struct net *net, + const char *name, unsigned int valid_hooks, struct xt_table_info **pinfo, void **pentry0, @@ -1810,12 +1813,12 @@ translate_compat_table(const char *name, i = 0; ret = IP6T_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry, - name, &i); + net, name, &i); if (ret) { j -= i; COMPAT_IP6T_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i, compat_release_entry, &j); - IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i); + IP6T_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, net, &i); xt_free_table_info(newinfo); return ret; } @@ -1870,7 +1873,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) goto free_newinfo; } - ret = translate_compat_table(tmp.name, tmp.valid_hooks, + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, &newinfo, &loc_cpu_entry, tmp.size, tmp.num_entries, tmp.hook_entry, tmp.underflow); @@ -1886,7 +1889,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return 0; free_newinfo_untrans: - IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, net, NULL); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -2144,7 +2147,7 @@ out: return ERR_PTR(ret); } -void ip6t_unregister_table(struct xt_table *table) +void ip6t_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -2154,7 +2157,7 @@ void ip6t_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, net, NULL); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index ad378efd0eb8..33ddfe53e18d 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -131,7 +131,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) static void __net_exit ip6table_filter_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_filter); + ip6t_unregister_table(net, net->ipv6.ip6table_filter); } static struct pernet_operations ip6table_filter_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a929c19d30e3..9bc483f000e5 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -182,7 +182,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) static void __net_exit ip6table_mangle_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_mangle); + ip6t_unregister_table(net, net->ipv6.ip6table_mangle); } static struct pernet_operations ip6table_mangle_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index ed1a1180f3b3..4c90b552e433 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -94,7 +94,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) static void __net_exit ip6table_raw_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_raw); + ip6t_unregister_table(net, net->ipv6.ip6table_raw); } static struct pernet_operations ip6table_raw_net_ops = { diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 41b444c60934..baa8d4ef3b0a 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -134,7 +134,7 @@ static int __net_init ip6table_security_net_init(struct net *net) static void __net_exit ip6table_security_net_exit(struct net *net) { - ip6t_unregister_table(net->ipv6.ip6table_security); + ip6t_unregister_table(net, net->ipv6.ip6table_security); } static struct pernet_operations ip6table_security_net_ops = { -- cgit v1.2.3 From 7c070aa947d1a4105742378579c267f6e7fd08a1 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 20 Jan 2010 10:42:41 +0100 Subject: IPv6: reassembly: replace magic number with macro definitions Use macro to define high/low thresh value, refer to IPV6_FRAG_TIMEOUT. Signed-off-by: Shan Wei Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- include/net/ipv6.h | 2 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 4 ++-- net/ipv6/reassembly.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ccab5946c830..299bbf5adfb6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -246,6 +246,8 @@ extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); int ip6_frag_nqueues(struct net *net); int ip6_frag_mem(struct net *net); +#define IPV6_FRAG_HIGH_THRESH 262144 /* == 256*1024 */ +#define IPV6_FRAG_LOW_THRESH 196608 /* == 192*1024 */ #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ extern int __ipv6_addr_type(const struct in6_addr *addr); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1030ce1e6c79..744ea49de356 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -666,8 +666,8 @@ int nf_ct_frag6_init(void) nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = 256 * 1024; - nf_init_frags.low_thresh = 192 * 1024; + nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2cddea3bd6be..15bb122e1ce4 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -747,8 +747,8 @@ static inline void ip6_frags_sysctl_unregister(void) static int ipv6_frags_init_net(struct net *net) { - net->ipv6.frags.high_thresh = 256 * 1024; - net->ipv6.frags.low_thresh = 192 * 1024; + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; inet_frags_init_net(&net->ipv6.frags); -- cgit v1.2.3 From c30f540b63047437ffa894b5353216410c480d1a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 2 Feb 2010 15:03:24 +0100 Subject: netfilter: xtables: CONFIG_COMPAT redux Ifdef out struct nf_sockopt_ops::compat_set struct nf_sockopt_ops::compat_get struct xt_match::compat_from_user struct xt_match::compat_to_user struct xt_match::compatsize to make structures smaller on COMPAT=n kernels. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter.h | 9 ++++++--- include/linux/netfilter/x_tables.h | 12 ++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 48c54960773c..78f33d223680 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -114,15 +114,17 @@ struct nf_sockopt_ops { int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, void __user *user, unsigned int len); +#ifdef CONFIG_COMPAT int (*compat_set)(struct sock *sk, int optval, void __user *user, unsigned int len); - +#endif int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); +#ifdef CONFIG_COMPAT int (*compat_get)(struct sock *sk, int optval, void __user *user, int *len); - +#endif /* Use the module struct to lock set/get code in place */ struct module *owner; }; @@ -222,11 +224,12 @@ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); - +#ifdef CONFIG_COMPAT int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, unsigned int len); int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); +#endif /* Call this before modifying an existing packet: ensures it is modifiable and linear to the point you care about (writable_len). diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 3caf5e151102..026eb78ee83c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -283,11 +283,11 @@ struct xt_match { /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_mtdtor_param *); - +#ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); int (*compat_to_user)(void __user *dst, void *src); - +#endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -296,7 +296,9 @@ struct xt_match { const char *table; unsigned int matchsize; +#ifdef CONFIG_COMPAT unsigned int compatsize; +#endif unsigned int hooks; unsigned short proto; @@ -323,17 +325,19 @@ struct xt_target { /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); - +#ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, void *src); int (*compat_to_user)(void __user *dst, void *src); - +#endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int targetsize; +#ifdef CONFIG_COMPAT unsigned int compatsize; +#endif unsigned int hooks; unsigned short proto; -- cgit v1.2.3 From 794e68716bab578ae8f8912dc934496d7c7abc90 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:41:29 +0100 Subject: netfilter: ctnetlink: only assign helpers for matching protocols Make sure not to assign a helper for a different network or transport layer protocol to a connection. Additionally change expectation deletion by helper to compare the name directly - there might be multiple helper registrations using the same name, currently one of them is chosen in an unpredictable manner and only those expectations are removed. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_helper.h | 2 +- net/netfilter/nf_conntrack_helper.c | 8 +++++--- net/netfilter/nf_conntrack_netlink.c | 23 +++++++++++------------ 3 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index d015de92e03f..86be7c4816d6 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -40,7 +40,7 @@ struct nf_conntrack_helper { }; extern struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name); +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 65c2a7bc3afc..c0e461f466ae 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -65,7 +65,7 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) } struct nf_conntrack_helper * -__nf_conntrack_helper_find_byname(const char *name) +__nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct hlist_node *n; @@ -73,13 +73,15 @@ __nf_conntrack_helper_find_byname(const char *name) for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, n, &nf_ct_helper_hash[i], hnode) { - if (!strcmp(h->name, name)) + if (!strcmp(h->name, name) && + h->tuple.src.l3num == l3num && + h->tuple.dst.protonum == protonum) return h; } } return NULL; } -EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find_byname); +EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 79478dfba27e..16f86d61e5d1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1001,7 +1001,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) return 0; } - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { #ifdef CONFIG_MODULES spin_unlock_bh(&nf_conntrack_lock); @@ -1012,7 +1013,8 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } spin_lock_bh(&nf_conntrack_lock); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) return -EAGAIN; #endif @@ -1211,7 +1213,8 @@ ctnetlink_create_conntrack(struct net *net, if (err < 0) goto err2; - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper == NULL) { rcu_read_unlock(); #ifdef CONFIG_MODULES @@ -1221,7 +1224,9 @@ ctnetlink_create_conntrack(struct net *net, } rcu_read_lock(); - helper = __nf_conntrack_helper_find_byname(helpname); + helper = __nf_conntrack_helper_find(helpname, + nf_ct_l3num(ct), + nf_ct_protonum(ct)); if (helper) { err = -EAGAIN; goto err2; @@ -1714,7 +1719,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct net *net = sock_net(ctnl); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; - struct nf_conntrack_helper *h; struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; @@ -1751,18 +1755,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* delete all expectations for this helper */ spin_lock_bh(&nf_conntrack_lock); - h = __nf_conntrack_helper_find_byname(name); - if (!h) { - spin_unlock_bh(&nf_conntrack_lock); - return -EOPNOTSUPP; - } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, &net->ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); - if (m_help->helper == h - && del_timer(&exp->timeout)) { + if (!strcmp(m_help->helper->name, name) && + del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); nf_ct_expect_put(exp); } -- cgit v1.2.3 From add67461240c1dadc7c8d97e66f8f92b556ca523 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:45:12 +0100 Subject: netfilter: add struct net * to target parameters Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 ++ net/bridge/netfilter/ebtables.c | 10 ++++++---- net/ipv4/netfilter/ip_tables.c | 8 +++++--- net/ipv6/netfilter/ip6_tables.c | 8 +++++--- 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 026eb78ee83c..365fabe1b16e 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -249,6 +249,7 @@ struct xt_target_param { * Other fields see above. */ struct xt_tgchk_param { + struct net *net; const char *table; const void *entryinfo; const struct xt_target *target; @@ -259,6 +260,7 @@ struct xt_tgchk_param { /* Target destructor parameters */ struct xt_tgdtor_param { + struct net *net; const struct xt_target *target; void *targinfo; u_int8_t family; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 1aa0e4c1f52d..12beb580aa21 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -579,13 +579,14 @@ ebt_cleanup_match(struct ebt_entry_match *m, struct net *net, unsigned int *i) } static inline int -ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) +ebt_cleanup_watcher(struct ebt_entry_watcher *w, struct net *net, unsigned int *i) { struct xt_tgdtor_param par; if (i && (*i)-- == 0) return 1; + par.net = net; par.target = w->u.watcher; par.targinfo = w->data; par.family = NFPROTO_BRIDGE; @@ -606,10 +607,11 @@ ebt_cleanup_entry(struct ebt_entry *e, struct net *net, unsigned int *cnt) /* we're done */ if (cnt && (*cnt)-- == 0) return 1; - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, NULL); EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); + par.net = net; par.target = t->u.target; par.targinfo = t->data; par.family = NFPROTO_BRIDGE; @@ -674,7 +676,7 @@ ebt_check_entry(struct ebt_entry *e, } i = 0; - mtpar.net = net; + mtpar.net = tgpar.net = net; mtpar.table = tgpar.table = name; mtpar.entryinfo = tgpar.entryinfo = e; mtpar.hook_mask = tgpar.hook_mask = hookmask; @@ -730,7 +732,7 @@ ebt_check_entry(struct ebt_entry *e, (*cnt)++; return 0; cleanup_watchers: - EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, &j); + EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, net, &j); cleanup_matches: EBT_MATCH_ITERATE(e, ebt_cleanup_match, net, &i); return ret; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index cfaba0e2e6fc..7fde8f6950d8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -638,10 +638,11 @@ err: return ret; } -static int check_target(struct ipt_entry *e, const char *name) +static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct ipt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -697,7 +698,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; @@ -788,6 +789,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ipt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; @@ -1675,7 +1677,7 @@ compat_check_entry(struct ipt_entry *e, struct net *net, const char *name, if (ret) goto cleanup_matches; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto cleanup_matches; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 9f1d45f2ba8f..0376ed6d5594 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -669,10 +669,11 @@ err: return ret; } -static int check_target(struct ip6t_entry *e, const char *name) +static int check_target(struct ip6t_entry *e, struct net *net, const char *name) { struct ip6t_entry_target *t = ip6t_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -729,7 +730,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; @@ -820,6 +821,7 @@ cleanup_entry(struct ip6t_entry *e, struct net *net, unsigned int *i) IP6T_MATCH_ITERATE(e, cleanup_match, net, NULL); t = ip6t_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV6; @@ -1710,7 +1712,7 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, if (ret) goto cleanup_matches; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto cleanup_matches; -- cgit v1.2.3 From 858b31330054a9ad259feceea0ad1ce5385c47f0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:48:53 +0100 Subject: netfilter: nf_conntrack: split up IPCT_STATUS event Split up the IPCT_STATUS event into an IPCT_REPLY event, which is generated when the IPS_SEEN_REPLY bit is set, and an IPCT_ASSURED event, which is generated when the IPS_ASSURED bit is set. In combination with a following patch to support selective event delivery, this can be used for "sparse" conntrack replication: start replicating the conntrack entry after it reached the ASSURED state and that way it's SYN-flood resistant. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_ecache.h | 21 +++++++++++---------- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 6 ++++-- net/netfilter/nf_conntrack_proto_gre.c | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 2 +- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- 8 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 475facc3051a..5e05fb883ab1 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -14,19 +14,20 @@ /* Connection tracking event types */ enum ip_conntrack_events { - IPCT_NEW = 0, /* new conntrack */ - IPCT_RELATED = 1, /* related conntrack */ - IPCT_DESTROY = 2, /* destroyed conntrack */ - IPCT_STATUS = 3, /* status has changed */ - IPCT_PROTOINFO = 4, /* protocol information has changed */ - IPCT_HELPER = 5, /* new helper has been set */ - IPCT_MARK = 6, /* new mark has been set */ - IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */ - IPCT_SECMARK = 8, /* new security mark has been set */ + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ }; enum ip_conntrack_expect_events { - IPEXP_NEW = 0, /* new expectation */ + IPEXP_NEW, /* new expectation */ }; struct nf_conntrack_ecache { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0e98c3282d42..091ff770eb7b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -825,7 +825,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_REPLY, ct); return ret; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 16f86d61e5d1..ff594eb138c1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1371,7 +1371,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | @@ -1396,7 +1397,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + nf_conntrack_eventmask_report((1 << IPCT_REPLY) | + (1 << IPCT_ASSURED) | (1 << IPCT_HELPER) | (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index c99cfba64ddc..d899b1a69940 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -241,7 +241,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index f9d930f80276..b68ff15ed979 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -377,7 +377,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3c96437b45ad..ad118053971a 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1045,7 +1045,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 5c5518bedb4b..8d38f9a4bed8 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -77,7 +77,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 458655bb2106..0b1bc9ba6678 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, ct); + nf_conntrack_event_cache(IPCT_ASSURED, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); -- cgit v1.2.3 From 0cebe4b4163b6373c9d24c1a192939777bc27e55 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 13:51:51 +0100 Subject: netfilter: ctnetlink: support selective event delivery Add two masks for conntrack end expectation events to struct nf_conntrack_ecache and use them to filter events. Their default value is "all events" when the event sysctl is on and "no events" when it is off. A following patch will add specific initializations. Expectation events depend on the ecache struct of their master conntrack. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 18 ++++++++ include/net/netfilter/nf_conntrack_ecache.h | 59 +++++++++++++-------------- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 2 +- 4 files changed, 48 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index a374787ed9b0..ebfed90733f7 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -74,6 +74,24 @@ enum ip_conntrack_status { IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), }; +/* Connection tracking event types */ +enum ip_conntrack_events { + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW, /* new expectation */ +}; + #ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 5e05fb883ab1..96ba5f7dcab6 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,28 +12,12 @@ #include #include -/* Connection tracking event types */ -enum ip_conntrack_events { - IPCT_NEW, /* new conntrack */ - IPCT_RELATED, /* related conntrack */ - IPCT_DESTROY, /* destroyed conntrack */ - IPCT_REPLY, /* connection has seen two-way traffic */ - IPCT_ASSURED, /* connection status has changed to assured */ - IPCT_PROTOINFO, /* protocol information has changed */ - IPCT_HELPER, /* new helper has been set */ - IPCT_MARK, /* new mark has been set */ - IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ - IPCT_SECMARK, /* new security mark has been set */ -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW, /* new expectation */ -}; - struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u32 pid; /* netlink pid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 pid; /* netlink pid of destroyer */ }; static inline struct nf_conntrack_ecache * @@ -43,14 +27,24 @@ nf_ct_ecache_find(const struct nf_conn *ct) } static inline struct nf_conntrack_ecache * -nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp) +nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) { struct net *net = nf_ct_net(ct); + struct nf_conntrack_ecache *e; - if (!net->ct.sysctl_events) + if (!ctmask && !expmask && net->ct.sysctl_events) { + ctmask = ~0; + expmask = ~0; + } + if (!ctmask && !expmask) return NULL; - return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + e = nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp); + if (e) { + e->ctmask = ctmask; + e->expmask = expmask; + } + return e; }; #ifdef CONFIG_NF_CONNTRACK_EVENTS @@ -83,6 +77,9 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) if (e == NULL) return; + if (!(e->ctmask & (1 << event))) + return; + set_bit(event, &e->cache); } @@ -93,7 +90,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, int report) { int ret = 0; - struct net *net = nf_ct_net(ct); struct nf_ct_event_notifier *notify; struct nf_conntrack_ecache *e; @@ -102,9 +98,6 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) - goto out_unlock; - e = nf_ct_ecache_find(ct); if (e == NULL) goto out_unlock; @@ -118,6 +111,9 @@ nf_conntrack_eventmask_report(unsigned int eventmask, /* This is a resent of a destroy event? If so, skip missed */ unsigned long missed = e->pid ? 0 : e->missed; + if (!((eventmask | missed) & e->ctmask)) + goto out_unlock; + ret = notify->fcn(eventmask | missed, &item); if (unlikely(ret < 0 || missed)) { spin_lock_bh(&ct->lock); @@ -173,18 +169,19 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, u32 pid, int report) { - struct net *net = nf_ct_exp_net(exp); struct nf_exp_event_notifier *notify; + struct nf_conntrack_ecache *e; rcu_read_lock(); notify = rcu_dereference(nf_expect_event_cb); if (notify == NULL) goto out_unlock; - if (!net->ct.sysctl_events) + e = nf_ct_ecache_find(exp->master); + if (e == NULL) goto out_unlock; - { + if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, .pid = pid, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 091ff770eb7b..53b8da6ad6b7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -648,7 +648,7 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ff594eb138c1..f5c0b09e12f1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1281,7 +1281,7 @@ ctnetlink_create_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) -- cgit v1.2.3 From b2a15a604d379af323645e330638e2cfcc696aff Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 14:13:03 +0100 Subject: netfilter: nf_conntrack: support conntrack templates Support initializing selected parameters of new conntrack entries from a "conntrack template", which is a specially marked conntrack entry attached to the skb. Currently the helper and the event delivery masks can be initialized this way. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 4 +++ include/net/netfilter/nf_conntrack.h | 5 +++ include/net/netfilter/nf_conntrack_helper.h | 3 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/netfilter/nf_conntrack_core.c | 50 +++++++++++++++++--------- net/netfilter/nf_conntrack_helper.c | 17 ++++++--- net/netfilter/nf_conntrack_netlink.c | 2 +- 8 files changed, 61 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index ebfed90733f7..c608677dda60 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -72,6 +72,10 @@ enum ip_conntrack_status { /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), + + /* Conntrack is a template */ + IPS_TEMPLATE_BIT = 11, + IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), }; /* Connection tracking event types */ diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a0904adfb8f7..5043d61c99a7 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -272,6 +272,11 @@ nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp); +static inline int nf_ct_is_template(const struct nf_conn *ct) +{ + return test_bit(IPS_TEMPLATE_BIT, &ct->status); +} + /* It's confirmed if it is, or has been in the hash table. */ static inline int nf_ct_is_confirmed(struct nf_conn *ct) { diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 86be7c4816d6..e17aaa3e19fd 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -47,7 +47,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); -extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags); extern void nf_ct_helper_destroy(struct nf_conn *ct); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 331ead3ebd1b..77627fa80561 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -59,7 +59,7 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; #endif #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 0956ebabbff2..55ce22e5de49 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -212,7 +212,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if (skb->nfct) + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) return NF_ACCEPT; reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 53b8da6ad6b7..471e2a79d26f 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(struct net *net, +init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, @@ -628,6 +628,7 @@ init_conntrack(struct net *net, struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; + struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { @@ -648,7 +649,11 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); - nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); + + ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; + nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, + ecache ? ecache->expmask : 0, + GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); @@ -673,7 +678,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -694,7 +699,7 @@ init_conntrack(struct net *net, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct net *net, +resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, @@ -718,7 +723,8 @@ resolve_normal_ct(struct net *net, /* look for tuple match */ h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, + skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -755,7 +761,7 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { - struct nf_conn *ct; + struct nf_conn *ct, *tmpl = NULL; enum ip_conntrack_info ctinfo; struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; @@ -764,10 +770,14 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, int set_reply = 0; int ret; - /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(net, ignore); - return NF_ACCEPT; + /* Previously seen (loopback or untracked)? Ignore. */ + tmpl = (struct nf_conn *)skb->nfct; + if (!nf_ct_is_template(tmpl)) { + NF_CT_STAT_INC_ATOMIC(net, ignore); + return NF_ACCEPT; + } + skb->nfct = NULL; } /* rcu_read_lock()ed by nf_hook_slow */ @@ -778,7 +788,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, pr_debug("not prepared to track yet or error occured\n"); NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } l4proto = __nf_ct_l4proto_find(pf, protonum); @@ -791,22 +802,25 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); - return -ret; + ret = -ret; + goto out; } } - ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(net, invalid); - return NF_ACCEPT; + ret = NF_ACCEPT; + goto out; } if (IS_ERR(ct)) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(net, drop); - return NF_DROP; + ret = NF_DROP; + goto out; } NF_CT_ASSERT(skb->nfct); @@ -821,11 +835,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_STAT_INC_ATOMIC(net, invalid); if (ret == -NF_DROP) NF_CT_STAT_INC_ATOMIC(net, drop); - return -ret; + ret = -ret; + goto out; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); +out: + if (tmpl) + nf_ct_put(tmpl); return ret; } @@ -864,7 +882,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index c0e461f466ae..8144b0da5515 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -96,13 +96,22 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); -int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, + gfp_t flags) { + struct nf_conntrack_helper *helper = NULL; + struct nf_conn_help *help; int ret = 0; - struct nf_conntrack_helper *helper; - struct nf_conn_help *help = nfct_help(ct); - helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (tmpl != NULL) { + help = nfct_help(tmpl); + if (help != NULL) + helper = help->helper; + } + + help = nfct_help(ct); + if (helper == NULL) + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); if (helper == NULL) { if (help) rcu_assign_pointer(help->helper, NULL); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f5c0b09e12f1..09044f9f4b2e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1249,7 +1249,7 @@ ctnetlink_create_conntrack(struct net *net, } } else { /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); if (err < 0) goto err2; } -- cgit v1.2.3 From 84f3bb9ae9db90f7fb15d98b55279a58ab1b2363 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 3 Feb 2010 17:17:06 +0100 Subject: netfilter: xtables: add CT target Add a new target for the raw table, which can be used to specify conntrack parameters for specific connections, f.i. the conntrack helper. The target attaches a "template" connection tracking entry to the skb, which is used by the conntrack core when initializing a new conntrack. Signed-off-by: Patrick McHardy --- include/linux/netfilter/Kbuild | 1 + include/linux/netfilter/xt_CT.h | 17 +++ include/net/netfilter/nf_conntrack_helper.h | 3 + net/netfilter/Kconfig | 12 +++ net/netfilter/Makefile | 1 + net/netfilter/nf_conntrack_helper.c | 19 ++++ net/netfilter/xt_CT.c | 158 ++++++++++++++++++++++++++++ 7 files changed, 211 insertions(+) create mode 100644 include/linux/netfilter/xt_CT.h create mode 100644 net/netfilter/xt_CT.c (limited to 'include') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 2aea50399c0b..a5a63e41b8af 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -6,6 +6,7 @@ header-y += nfnetlink_queue.h header-y += xt_CLASSIFY.h header-y += xt_CONNMARK.h header-y += xt_CONNSECMARK.h +header-y += xt_CT.h header-y += xt_DSCP.h header-y += xt_LED.h header-y += xt_MARK.h diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h new file mode 100644 index 000000000000..7fd0effe1316 --- /dev/null +++ b/include/linux/netfilter/xt_CT.h @@ -0,0 +1,17 @@ +#ifndef _XT_CT_H +#define _XT_CT_H + +#define XT_CT_NOTRACK 0x1 + +struct xt_ct_target_info { + u_int16_t flags; + u_int16_t __unused; + u_int32_t ct_events; + u_int32_t exp_events; + char helper[16]; + + /* Used internally by the kernel */ + struct nf_conn *ct __attribute__((aligned(8))); +}; + +#endif /* _XT_CT_H */ diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index e17aaa3e19fd..32c305dbdab6 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -42,6 +42,9 @@ struct nf_conntrack_helper { extern struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum); +extern struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); + extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 634d14affc8d..4469d45261f4 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -341,6 +341,18 @@ config NETFILTER_XT_TARGET_CONNSECMARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_CT + tristate '"CT" target support' + depends on NF_CONNTRACK + depends on IP_NF_RAW || IP6_NF_RAW + depends on NETFILTER_ADVANCED + help + This options adds a `CT' target, which allows to specify initial + connection tracking parameters like events to be delivered and + the helper to be used. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' depends on IP_NF_MANGLE || IP6_NF_MANGLE diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 49f62ee4e9ff..f873644f02f6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8144b0da5515..a74a5769877b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -83,6 +83,25 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); +struct nf_conntrack_helper * +nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) +{ + struct nf_conntrack_helper *h; + + h = __nf_conntrack_helper_find(name, l3num, protonum); +#ifdef CONFIG_MODULES + if (h == NULL) { + if (request_module("nfct-helper-%s", name) == 0) + h = __nf_conntrack_helper_find(name, l3num, protonum); + } +#endif + if (h != NULL && !try_module_get(h->me)) + h = NULL; + + return h; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c new file mode 100644 index 000000000000..8183a054256f --- /dev/null +++ b/net/netfilter/xt_CT.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2010 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int xt_ct_target(struct sk_buff *skb, + const struct xt_target_param *par) +{ + const struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + atomic_inc(&ct->ct_general.use); + skb->nfct = &ct->ct_general; + skb->nfctinfo = IP_CT_NEW; + + return XT_CONTINUE; +} + +static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) +{ + if (par->family == AF_INET) { + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.invflags & IPT_INV_PROTO) + return 0; + return e->ip.proto; + } else if (par->family == AF_INET6) { + const struct ip6t_entry *e = par->entryinfo; + + if (e->ipv6.invflags & IP6T_INV_PROTO) + return 0; + return e->ipv6.proto; + } else + return 0; +} + +static bool xt_ct_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conntrack_tuple t; + struct nf_conn_help *help; + struct nf_conn *ct; + u8 proto; + + if (info->flags & ~XT_CT_NOTRACK) + return false; + + if (info->flags & XT_CT_NOTRACK) { + ct = &nf_conntrack_untracked; + atomic_inc(&ct->ct_general.use); + goto out; + } + + if (nf_ct_l3proto_try_module_get(par->family) < 0) + goto err1; + + memset(&t, 0, sizeof(t)); + ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL); + if (IS_ERR(ct)) + goto err2; + + if ((info->ct_events || info->exp_events) && + !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, + GFP_KERNEL)) + goto err3; + + if (info->helper[0]) { + proto = xt_ct_find_proto(par); + if (!proto) + goto err3; + + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (help->helper == NULL) + goto err3; + } + + __set_bit(IPS_TEMPLATE_BIT, &ct->status); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); +out: + info->ct = ct; + return true; + +err3: + nf_conntrack_free(ct); +err2: + nf_ct_l3proto_module_put(par->family); +err1: + return false; +} + +static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct xt_ct_target_info *info = par->targinfo; + struct nf_conn *ct = info->ct; + struct nf_conn_help *help; + + if (ct != &nf_conntrack_untracked) { + help = nfct_help(ct); + if (help) + module_put(help->helper->me); + + nf_ct_l3proto_module_put(par->family); + } + nf_ct_put(info->ct); +} + +static struct xt_target xt_ct_tg __read_mostly = { + .name = "CT", + .family = NFPROTO_UNSPEC, + .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)), + .checkentry = xt_ct_tg_check, + .destroy = xt_ct_tg_destroy, + .target = xt_ct_target, + .table = "raw", + .me = THIS_MODULE, +}; + +static int __init xt_ct_tg_init(void) +{ + return xt_register_target(&xt_ct_tg); +} + +static void __exit xt_ct_tg_exit(void) +{ + xt_unregister_target(&xt_ct_tg); +} + +module_init(xt_ct_tg_init); +module_exit(xt_ct_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: connection tracking target"); +MODULE_ALIAS("ipt_CT"); +MODULE_ALIAS("ip6t_CT"); -- cgit v1.2.3 From 0a02604628c49037e1de2091d75462fd856b26ed Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 10 Feb 2010 15:00:32 +0100 Subject: netfilter: xtables: consistent struct compat_xt_counters definition There is compat_u64 type which deals with different u64 type alignment on different compat-capable platforms, so use it and removed some hardcoded assumptions. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 365fabe1b16e..8b6c0e7d2657 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -562,11 +562,7 @@ struct compat_xt_entry_target { * current task alignment */ struct compat_xt_counters { -#if defined(CONFIG_X86_64) || defined(CONFIG_IA64) - u_int32_t cnt[4]; -#else - u_int64_t cnt[2]; -#endif + compat_u64 pcnt, bcnt; /* Packet and byte counters */ }; struct compat_xt_counters_info { -- cgit v1.2.3 From 42107f5009da223daa800d6da6904d77297ae829 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 10 Feb 2010 15:03:27 +0100 Subject: netfilter: xtables: symmetric COMPAT_XT_ALIGN definition Rewrite COMPAT_XT_ALIGN in terms of dummy structure hack. Compat counters logically have nothing to do with it. Use ALIGN() macro while I'm at it for same types. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 13 +++++++++---- net/netfilter/x_tables.c | 4 ++-- 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8b6c0e7d2657..9d671ebf0605 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -93,8 +93,7 @@ struct _xt_align { __u64 u64; }; -#define XT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) \ - & ~(__alignof__(struct _xt_align)-1)) +#define XT_ALIGN(s) ALIGN((s), __alignof__(struct _xt_align)) /* Standard return verdict, or do jump. */ #define XT_STANDARD_TARGET "" @@ -571,8 +570,14 @@ struct compat_xt_counters_info { struct compat_xt_counters counters[0]; }; -#define COMPAT_XT_ALIGN(s) (((s) + (__alignof__(struct compat_xt_counters)-1)) \ - & ~(__alignof__(struct compat_xt_counters)-1)) +struct _compat_xt_align { + __u8 u8; + __u16 u16; + __u32 u32; + compat_u64 u64; +}; + +#define COMPAT_XT_ALIGN(s) ALIGN((s), __alignof__(struct _compat_xt_align)) extern void xt_compat_lock(u_int8_t af); extern void xt_compat_unlock(u_int8_t af); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f01955cce314..5c564ff10a3b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -364,7 +364,7 @@ int xt_check_match(struct xt_mtchk_param *par, * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ - pr_err("%s_tables: %s match: invalid size %Zu != %u\n", + pr_err("%s_tables: %s match: invalid size %u != %u\n", xt_prefix[par->family], par->match->name, XT_ALIGN(par->match->matchsize), size); return -EINVAL; @@ -514,7 +514,7 @@ int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { if (XT_ALIGN(par->target->targetsize) != size) { - pr_err("%s_tables: %s target: invalid size %Zu != %u\n", + pr_err("%s_tables: %s target: invalid size %u != %u\n", xt_prefix[par->family], par->target->name, XT_ALIGN(par->target->targetsize), size); return -EINVAL; -- cgit v1.2.3 From 2b95efe7f6bb750256a702cc32d33b0cb2cd8223 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 17 Jun 2009 13:57:48 +0200 Subject: netfilter: xtables: use xt_table for hook instantiation The respective xt_table structures already have most of the metadata needed for hook setup. Add a 'priority' field to struct xt_table so that xt_hook_link() can be called with a reduced number of arguments. So should we be having more tables in the future, it comes at no static cost (only runtime, as before) - space saved: 6807373->6806555. Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 4 +++ net/ipv4/netfilter/arptable_filter.c | 33 +++++---------------- net/ipv4/netfilter/iptable_filter.c | 33 +++++---------------- net/ipv4/netfilter/iptable_mangle.c | 47 +++++------------------------ net/ipv4/netfilter/iptable_raw.c | 27 +++++------------ net/ipv4/netfilter/iptable_security.c | 33 +++++---------------- net/ipv6/netfilter/ip6table_filter.c | 33 +++++---------------- net/ipv6/netfilter/ip6table_mangle.c | 48 +++++------------------------- net/ipv6/netfilter/ip6table_raw.c | 26 +++++----------- net/ipv6/netfilter/ip6table_security.c | 33 +++++---------------- net/netfilter/x_tables.c | 54 ++++++++++++++++++++++++++++++++++ 11 files changed, 121 insertions(+), 250 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 365fabe1b16e..fdd3342c1235 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -361,6 +361,7 @@ struct xt_table { struct module *me; u_int8_t af; /* address/protocol family */ + int priority; /* hook order */ /* A unique name... */ const char name[XT_TABLE_MAXNAMELEN]; @@ -522,6 +523,9 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } +extern struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); +extern void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); + #ifdef CONFIG_COMPAT #include diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index deeda9b2cf05..b361de0dac4c 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -50,6 +50,7 @@ static const struct xt_table packet_filter = { .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_ARP, + .priority = NF_IP_PRI_FILTER, }; /* The work comes in here from netfilter.c */ @@ -63,29 +64,7 @@ arptable_filter_hook(unsigned int hook, struct sk_buff *skb, return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); } -static struct nf_hook_ops arpt_ops[] __read_mostly = { - { - .hook = arptable_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_IN, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = arptable_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_OUT, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = arptable_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_ARP, - .hooknum = NF_ARP_FORWARD, - .priority = NF_IP_PRI_FILTER, - }, -}; +static struct nf_hook_ops *arpfilter_ops __read_mostly; static int __net_init arptable_filter_net_init(struct net *net) { @@ -115,9 +94,11 @@ static int __init arptable_filter_init(void) if (ret < 0) return ret; - ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); - if (ret < 0) + arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); + if (IS_ERR(arpfilter_ops)) { + ret = PTR_ERR(arpfilter_ops); goto cleanup_table; + } return ret; cleanup_table: @@ -127,7 +108,7 @@ cleanup_table: static void __exit arptable_filter_fini(void) { - nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops)); + xt_hook_unlink(&packet_filter, arpfilter_ops); unregister_pernet_subsys(&arptable_filter_net_ops); } diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 1bfeaae6f624..c14bb85db1d9 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -58,6 +58,7 @@ static const struct xt_table packet_filter = { .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_FILTER, }; static unsigned int @@ -77,29 +78,7 @@ iptable_filter_hook(unsigned int hook, struct sk_buff *skb, return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); } -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = iptable_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = iptable_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = iptable_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_FILTER, - }, -}; +static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static int forward = NF_ACCEPT; @@ -142,9 +121,11 @@ static int __init iptable_filter_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); + if (IS_ERR(filter_ops)) { + ret = PTR_ERR(filter_ops); goto cleanup_table; + } return ret; @@ -155,7 +136,7 @@ static int __init iptable_filter_init(void) static void __exit iptable_filter_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&packet_filter, filter_ops); unregister_pernet_subsys(&iptable_filter_net_ops); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 4e699cd275c6..2355a229f8ee 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -69,6 +69,7 @@ static const struct xt_table packet_mangler = { .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_MANGLE, }; static unsigned int @@ -129,43 +130,7 @@ iptable_mangle_hook(unsigned int hook, dev_net(in)->ipv4.iptable_mangle); } -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = iptable_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = iptable_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = iptable_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = iptable_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = iptable_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_MANGLE, - }, -}; +static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init iptable_mangle_net_init(struct net *net) { @@ -196,9 +161,11 @@ static int __init iptable_mangle_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); + if (IS_ERR(mangle_ops)) { + ret = PTR_ERR(mangle_ops); goto cleanup_table; + } return ret; @@ -209,7 +176,7 @@ static int __init iptable_mangle_init(void) static void __exit iptable_mangle_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&packet_mangler, mangle_ops); unregister_pernet_subsys(&iptable_mangle_net_ops); } diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index d16e43777c31..62a99154f14c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -41,6 +41,7 @@ static const struct xt_table packet_raw = { .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW, }; /* The work comes in here from netfilter.c. */ @@ -61,23 +62,7 @@ iptable_raw_hook(unsigned int hook, struct sk_buff *skb, return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); } -/* 'raw' is the very first table. */ -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = iptable_raw_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_RAW, - .owner = THIS_MODULE, - }, - { - .hook = iptable_raw_hook, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_RAW, - .owner = THIS_MODULE, - }, -}; +static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init iptable_raw_net_init(struct net *net) { @@ -108,9 +93,11 @@ static int __init iptable_raw_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); + if (IS_ERR(rawtable_ops)) { + ret = PTR_ERR(rawtable_ops); goto cleanup_table; + } return ret; @@ -121,7 +108,7 @@ static int __init iptable_raw_init(void) static void __exit iptable_raw_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&packet_raw, rawtable_ops); unregister_pernet_subsys(&iptable_raw_net_ops); } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 324505aaaa73..b1bf3ca2c6c7 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -62,6 +62,7 @@ static const struct xt_table security_table = { .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_SECURITY, }; static unsigned int @@ -82,29 +83,7 @@ iptable_security_hook(unsigned int hook, struct sk_buff *skb, return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); } -static struct nf_hook_ops ipt_ops[] __read_mostly = { - { - .hook = iptable_security_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP_PRI_SECURITY, - }, - { - .hook = iptable_security_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP_PRI_SECURITY, - }, - { - .hook = iptable_security_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV4, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_SECURITY, - }, -}; +static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init iptable_security_net_init(struct net *net) { @@ -135,9 +114,11 @@ static int __init iptable_security_init(void) if (ret < 0) return ret; - ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); - if (ret < 0) + sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); + if (IS_ERR(sectbl_ops)) { + ret = PTR_ERR(sectbl_ops); goto cleanup_table; + } return ret; @@ -148,7 +129,7 @@ cleanup_table: static void __exit iptable_security_fini(void) { - nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + xt_hook_unlink(&security_table, sectbl_ops); unregister_pernet_subsys(&iptable_security_net_ops); } diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 866f34ae236b..6e95d0614ca9 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -56,6 +56,7 @@ static const struct xt_table packet_filter = { .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, + .priority = NF_IP6_PRI_FILTER, }; /* The work comes in here from netfilter.c. */ @@ -69,29 +70,7 @@ ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); } -static struct nf_hook_ops ip6t_ops[] __read_mostly = { - { - .hook = ip6table_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_FILTER, - }, - { - .hook = ip6table_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP6_PRI_FILTER, - }, - { - .hook = ip6table_filter_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_FILTER, - }, -}; +static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static int forward = NF_ACCEPT; @@ -134,9 +113,11 @@ static int __init ip6table_filter_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - if (ret < 0) + filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook); + if (IS_ERR(filter_ops)) { + ret = PTR_ERR(filter_ops); goto cleanup_table; + } return ret; @@ -147,7 +128,7 @@ static int __init ip6table_filter_init(void) static void __exit ip6table_filter_fini(void) { - nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + xt_hook_unlink(&packet_filter, filter_ops); unregister_pernet_subsys(&ip6table_filter_net_ops); } diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 405ac1f76390..5023ac52ffec 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -62,6 +62,7 @@ static const struct xt_table packet_mangler = { .valid_hooks = MANGLE_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, + .priority = NF_IP6_PRI_MANGLE, }; static unsigned int @@ -122,44 +123,7 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, dev_net(in)->ipv6.ip6table_mangle); } -static struct nf_hook_ops ip6t_ops[] __read_mostly = { - { - .hook = ip6table_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_MANGLE, - }, - { - .hook = ip6table_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_MANGLE, - }, - { - .hook = ip6table_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP6_PRI_MANGLE, - }, - { - .hook = ip6table_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_MANGLE, - }, - { - .hook = ip6table_mangle_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_MANGLE, - }, -}; - +static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init ip6table_mangle_net_init(struct net *net) { /* Register table */ @@ -189,9 +153,11 @@ static int __init ip6table_mangle_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - if (ret < 0) + mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook); + if (IS_ERR(mangle_ops)) { + ret = PTR_ERR(mangle_ops); goto cleanup_table; + } return ret; @@ -202,7 +168,7 @@ static int __init ip6table_mangle_init(void) static void __exit ip6table_mangle_fini(void) { - nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + xt_hook_unlink(&packet_mangler, mangle_ops); unregister_pernet_subsys(&ip6table_mangle_net_ops); } diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5451a36fbc21..3bfa69511641 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -40,6 +40,7 @@ static const struct xt_table packet_raw = { .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, + .priority = NF_IP6_PRI_FIRST, }; /* The work comes in here from netfilter.c. */ @@ -53,22 +54,7 @@ ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); } -static struct nf_hook_ops ip6t_ops[] __read_mostly = { - { - .hook = ip6table_raw_hook, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_FIRST, - .owner = THIS_MODULE, - }, - { - .hook = ip6table_raw_hook, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_FIRST, - .owner = THIS_MODULE, - }, -}; +static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init ip6table_raw_net_init(struct net *net) { @@ -99,9 +85,11 @@ static int __init ip6table_raw_init(void) return ret; /* Register hooks */ - ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - if (ret < 0) + rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook); + if (IS_ERR(rawtable_ops)) { + ret = PTR_ERR(rawtable_ops); goto cleanup_table; + } return ret; @@ -112,7 +100,7 @@ static int __init ip6table_raw_init(void) static void __exit ip6table_raw_fini(void) { - nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + xt_hook_unlink(&packet_raw, rawtable_ops); unregister_pernet_subsys(&ip6table_raw_net_ops); } diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 841ea77f5218..dd2200f17a6c 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -61,6 +61,7 @@ static const struct xt_table security_table = { .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, + .priority = NF_IP6_PRI_SECURITY, }; static unsigned int @@ -74,29 +75,7 @@ ip6table_security_hook(unsigned int hook, struct sk_buff *skb, return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); } -static struct nf_hook_ops ip6t_ops[] __read_mostly = { - { - .hook = ip6table_security_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_IN, - .priority = NF_IP6_PRI_SECURITY, - }, - { - .hook = ip6table_security_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_FORWARD, - .priority = NF_IP6_PRI_SECURITY, - }, - { - .hook = ip6table_security_hook, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_SECURITY, - }, -}; +static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init ip6table_security_net_init(struct net *net) { @@ -127,9 +106,11 @@ static int __init ip6table_security_init(void) if (ret < 0) return ret; - ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); - if (ret < 0) + sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook); + if (IS_ERR(sectbl_ops)) { + ret = PTR_ERR(sectbl_ops); goto cleanup_table; + } return ret; @@ -140,7 +121,7 @@ cleanup_table: static void __exit ip6table_security_fini(void) { - nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + xt_hook_unlink(&security_table, sectbl_ops); unregister_pernet_subsys(&ip6table_security_net_ops); } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index f01955cce314..b51cb0d7234a 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1091,6 +1091,60 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ +/** + * xt_hook_link - set up hooks for a new table + * @table: table with metadata needed to set up hooks + * @fn: Hook function + * + * This function will take care of creating and registering the necessary + * Netfilter hooks for XT tables. + */ +struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) +{ + unsigned int hook_mask = table->valid_hooks; + uint8_t i, num_hooks = hweight32(hook_mask); + uint8_t hooknum; + struct nf_hook_ops *ops; + int ret; + + ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); + if (ops == NULL) + return ERR_PTR(-ENOMEM); + + for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0; + hook_mask >>= 1, ++hooknum) { + if (!(hook_mask & 1)) + continue; + ops[i].hook = fn; + ops[i].owner = table->me; + ops[i].pf = table->af; + ops[i].hooknum = hooknum; + ops[i].priority = table->priority; + ++i; + } + + ret = nf_register_hooks(ops, num_hooks); + if (ret < 0) { + kfree(ops); + return ERR_PTR(ret); + } + + return ops; +} +EXPORT_SYMBOL_GPL(xt_hook_link); + +/** + * xt_hook_unlink - remove hooks for a table + * @ops: nf_hook_ops array as returned by nf_hook_link + * @hook_mask: the very same mask that was passed to nf_hook_link + */ +void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops) +{ + nf_unregister_hooks(ops, hweight32(table->valid_hooks)); + kfree(ops); +} +EXPORT_SYMBOL_GPL(xt_hook_unlink); + int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From e3eaa9910b380530cfd2c0670fcd3f627674da8a Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 17 Jun 2009 22:14:54 +0200 Subject: netfilter: xtables: generate initial table on-demand The static initial tables are pretty large, and after the net namespace has been instantiated, they just hang around for nothing. This commit removes them and creates tables on-demand at runtime when needed. Size shrinks by 7735 bytes (x86_64). Signed-off-by: Jan Engelhardt --- include/linux/netfilter_arp/arp_tables.h | 1 + include/linux/netfilter_ipv4/ip_tables.h | 1 + include/linux/netfilter_ipv6/ip6_tables.h | 1 + net/ipv4/netfilter/arp_tables.c | 7 +++++ net/ipv4/netfilter/arptable_filter.c | 40 ++++++--------------------- net/ipv4/netfilter/ip_tables.c | 7 +++++ net/ipv4/netfilter/iptable_filter.c | 46 ++++++++----------------------- net/ipv4/netfilter/iptable_mangle.c | 46 +++++-------------------------- net/ipv4/netfilter/iptable_raw.c | 36 +++++------------------- net/ipv4/netfilter/iptable_security.c | 39 +++++--------------------- net/ipv4/netfilter/nf_nat_rule.c | 39 +++++--------------------- net/ipv6/netfilter/ip6_tables.c | 7 +++++ net/ipv6/netfilter/ip6table_filter.c | 46 ++++++++----------------------- net/ipv6/netfilter/ip6table_mangle.c | 45 +++++------------------------- net/ipv6/netfilter/ip6table_raw.c | 36 +++++------------------- net/ipv6/netfilter/ip6table_security.c | 39 +++++--------------------- net/netfilter/x_tables.c | 4 ++- net/netfilter/xt_repldata.h | 35 +++++++++++++++++++++++ 18 files changed, 141 insertions(+), 334 deletions(-) create mode 100644 net/netfilter/xt_repldata.h (limited to 'include') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index f2336523a9df..0b33980611b2 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -258,6 +258,7 @@ struct arpt_error { .target.errorname = "ERROR", \ } +extern void *arpt_alloc_initial_table(const struct xt_table *); extern struct xt_table *arpt_register_table(struct net *net, const struct xt_table *table, const struct arpt_replace *repl); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 8d1f273d350b..364973b42133 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -282,6 +282,7 @@ struct ipt_error { .target.errorname = "ERROR", \ } +extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index d2952d2fa658..8031eb486a10 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -297,6 +297,7 @@ ip6t_get_target(struct ip6t_entry *e) #include extern void ip6t_init(void) __init; +extern void *ip6t_alloc_initial_table(const struct xt_table *); extern struct xt_table *ip6t_register_table(struct net *net, const struct xt_table *table, const struct ip6t_replace *repl); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 90203e1b9187..72723ea1054b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -27,6 +27,7 @@ #include #include +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller "); @@ -58,6 +59,12 @@ do { \ #define ARP_NF_ASSERT(x) #endif +void *arpt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(arpt, ARPT); +} +EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); + static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, const char *hdr_addr, int len) { diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index b361de0dac4c..bfe26f32b930 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -6,6 +6,7 @@ */ #include +#include #include MODULE_LICENSE("GPL"); @@ -15,36 +16,6 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -static const struct -{ - struct arpt_replace repl; - struct arpt_standard entries[3]; - struct arpt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), - .hook_entry = { - [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), - }, - .underflow = { - [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), - }, - }, - .entries = { - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */ - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */ - ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */ - }, - .term = ARPT_ERROR_INIT, -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -68,9 +39,14 @@ static struct nf_hook_ops *arpfilter_ops __read_mostly; static int __net_init arptable_filter_net_init(struct net *net) { - /* Register table */ + struct arpt_replace *repl; + + repl = arpt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; net->ipv4.arptable_filter = - arpt_register_table(net, &packet_filter, &initial_table.repl); + arpt_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv4.arptable_filter)) return PTR_ERR(net->ipv4.arptable_filter); return 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5bf7de1527a5..2057b1bb6178 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -28,6 +28,7 @@ #include #include #include +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -66,6 +67,12 @@ do { \ #define inline #endif +void *ipt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ipt, IPT); +} +EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); + /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c14bb85db1d9..c8dc9800d620 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -23,36 +23,6 @@ MODULE_DESCRIPTION("iptables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -86,9 +56,18 @@ module_param(forward, bool, 0000); static int __net_init iptable_filter_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ipt_standard *)repl->entries)[1].target.verdict = + -forward - 1; + net->ipv4.iptable_filter = - ipt_register_table(net, &packet_filter, &initial_table.repl); + ipt_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_filter)) return PTR_ERR(net->ipv4.iptable_filter); return 0; @@ -113,9 +92,6 @@ static int __init iptable_filter_init(void) return -EINVAL; } - /* Entry 1 is the FORWARD hook */ - initial_table.entries[1].target.verdict = -forward - 1; - ret = register_pernet_subsys(&iptable_filter_net_ops); if (ret < 0) return ret; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 2355a229f8ee..58d7097baa3d 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -27,43 +27,6 @@ MODULE_DESCRIPTION("iptables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[5]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "mangle", - .valid_hooks = MANGLE_VALID_HOOKS, - .num_entries = 6, - .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -134,9 +97,14 @@ static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init iptable_mangle_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; net->ipv4.iptable_mangle = - ipt_register_table(net, &packet_mangler, &initial_table.repl); + ipt_register_table(net, &packet_mangler, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_mangle)) return PTR_ERR(net->ipv4.iptable_mangle); return 0; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 62a99154f14c..06fb9d11953c 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -9,33 +9,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[2]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .num_entries = 3, - .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -66,9 +39,14 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init iptable_raw_net_init(struct net *net) { - /* Register table */ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; net->ipv4.iptable_raw = - ipt_register_table(net, &packet_raw, &initial_table.repl); + ipt_register_table(net, &packet_raw, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_raw)) return PTR_ERR(net->ipv4.iptable_raw); return 0; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index b1bf3ca2c6c7..cce2f64e6f21 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -27,36 +27,6 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __net_initdata = { - .repl = { - .name = "security", - .valid_hooks = SECURITY_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, @@ -87,9 +57,14 @@ static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init iptable_security_net_init(struct net *net) { - net->ipv4.iptable_security = - ipt_register_table(net, &security_table, &initial_table.repl); + struct ipt_replace *repl; + repl = ipt_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, repl); + kfree(repl); if (IS_ERR(net->ipv4.iptable_security)) return PTR_ERR(net->ipv4.iptable_security); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 85da34fdc755..ab74cc0535e2 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -28,36 +28,6 @@ (1 << NF_INET_POST_ROUTING) | \ (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} nat_initial_table __net_initdata = { - .repl = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 - }, - }, - .entries = { - IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IPT_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -186,8 +156,13 @@ static struct xt_target ipt_dnat_reg __read_mostly = { static int __net_init nf_nat_rule_net_init(struct net *net) { - net->ipv4.nat_table = ipt_register_table(net, &nat_table, - &nat_initial_table.repl); + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nat_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nat_table, repl); + kfree(repl); if (IS_ERR(net->ipv4.nat_table)) return PTR_ERR(net->ipv4.nat_table); return 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 4332f4591482..dcd7825fe7b6 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -29,6 +29,7 @@ #include #include #include +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); @@ -67,6 +68,12 @@ do { \ #define inline #endif +void *ip6t_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ip6t, IP6T); +} +EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); + /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 6e95d0614ca9..36b72cafc227 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -21,36 +21,6 @@ MODULE_DESCRIPTION("ip6tables filter table"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[3]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2 - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, @@ -78,9 +48,18 @@ module_param(forward, bool, 0000); static int __net_init ip6table_filter_net_init(struct net *net) { - /* Register table */ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + /* Entry 1 is the FORWARD hook */ + ((struct ip6t_standard *)repl->entries)[1].target.verdict = + -forward - 1; + net->ipv6.ip6table_filter = - ip6t_register_table(net, &packet_filter, &initial_table.repl); + ip6t_register_table(net, &packet_filter, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_filter)) return PTR_ERR(net->ipv6.ip6table_filter); return 0; @@ -105,9 +84,6 @@ static int __init ip6table_filter_init(void) return -EINVAL; } - /* Entry 1 is the FORWARD hook */ - initial_table.entries[1].target.verdict = -forward - 1; - ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) return ret; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 5023ac52ffec..dc803b7e8e54 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -21,42 +21,6 @@ MODULE_DESCRIPTION("ip6tables mangle table"); (1 << NF_INET_LOCAL_OUT) | \ (1 << NF_INET_POST_ROUTING)) -static const struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[5]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "mangle", - .valid_hooks = MANGLE_VALID_HOOKS, - .num_entries = 6, - .size = sizeof(struct ip6t_standard) * 5 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_IN] = sizeof(struct ip6t_standard), - [NF_INET_FORWARD] = sizeof(struct ip6t_standard) * 2, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 3, - [NF_INET_POST_ROUTING] = sizeof(struct ip6t_standard) * 4, - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, @@ -126,9 +90,14 @@ ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, static struct nf_hook_ops *mangle_ops __read_mostly; static int __net_init ip6table_mangle_net_init(struct net *net) { - /* Register table */ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; net->ipv6.ip6table_mangle = - ip6t_register_table(net, &packet_mangler, &initial_table.repl); + ip6t_register_table(net, &packet_mangler, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_mangle)) return PTR_ERR(net->ipv6.ip6table_mangle); return 0; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 3bfa69511641..aef31a29de9e 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -8,33 +8,6 @@ #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static const struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[2]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .num_entries = 3, - .size = sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) - }, - .underflow = { - [NF_INET_PRE_ROUTING] = 0, - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, @@ -58,9 +31,14 @@ static struct nf_hook_ops *rawtable_ops __read_mostly; static int __net_init ip6table_raw_net_init(struct net *net) { - /* Register table */ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; net->ipv6.ip6table_raw = - ip6t_register_table(net, &packet_raw, &initial_table.repl); + ip6t_register_table(net, &packet_raw, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_raw)) return PTR_ERR(net->ipv6.ip6table_raw); return 0; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index dd2200f17a6c..0824d865aa9b 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -26,36 +26,6 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) -static const struct -{ - struct ip6t_replace repl; - struct ip6t_standard entries[3]; - struct ip6t_error term; -} initial_table __net_initdata = { - .repl = { - .name = "security", - .valid_hooks = SECURITY_VALID_HOOKS, - .num_entries = 4, - .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), - .hook_entry = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, - }, - .underflow = { - [NF_INET_LOCAL_IN] = 0, - [NF_INET_FORWARD] = sizeof(struct ip6t_standard), - [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, - }, - }, - .entries = { - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ - IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ - }, - .term = IP6T_ERROR_INIT, /* ERROR */ -}; - static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, @@ -79,9 +49,14 @@ static struct nf_hook_ops *sectbl_ops __read_mostly; static int __net_init ip6table_security_net_init(struct net *net) { - net->ipv6.ip6table_security = - ip6t_register_table(net, &security_table, &initial_table.repl); + struct ip6t_replace *repl; + repl = ip6t_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv6.ip6table_security = + ip6t_register_table(net, &security_table, repl); + kfree(repl); if (IS_ERR(net->ipv6.ip6table_security)) return PTR_ERR(net->ipv6.ip6table_security); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b51cb0d7234a..dc2e05cb54c0 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -26,7 +26,9 @@ #include #include - +#include +#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h new file mode 100644 index 000000000000..6efe4e5a81c6 --- /dev/null +++ b/net/netfilter/xt_repldata.h @@ -0,0 +1,35 @@ +/* + * Today's hack: quantum tunneling in structs + * + * 'entries' and 'term' are never anywhere referenced by word in code. In fact, + * they serve as the hanging-off data accessed through repl.data[]. + */ + +#define xt_alloc_initial_table(type, typ2) ({ \ + unsigned int hook_mask = info->valid_hooks; \ + unsigned int nhooks = hweight32(hook_mask); \ + unsigned int bytes = 0, hooknum = 0, i = 0; \ + struct { \ + struct type##_replace repl; \ + struct type##_standard entries[nhooks]; \ + struct type##_error term; \ + } *tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); \ + if (tbl == NULL) \ + return NULL; \ + strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \ + tbl->term = (struct type##_error)typ2##_ERROR_INIT; \ + tbl->repl.valid_hooks = hook_mask; \ + tbl->repl.num_entries = nhooks + 1; \ + tbl->repl.size = nhooks * sizeof(struct type##_standard) + \ + sizeof(struct type##_error); \ + for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \ + if (!(hook_mask & 1)) \ + continue; \ + tbl->repl.hook_entry[hooknum] = bytes; \ + tbl->repl.underflow[hooknum] = bytes; \ + tbl->entries[i++] = (struct type##_standard) \ + typ2##_STANDARD_INIT(NF_ACCEPT); \ + bytes += sizeof(struct type##_standard); \ + } \ + tbl; \ +}) -- cgit v1.2.3 From b87921bdf25485afd8f5a5f25e86b5acef32a9cf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:22:48 +0100 Subject: netfilter: nf_conntrack: show helper and class in /proc/net/nf_conntrack_expect Make the output a bit more informative by showing the helper an expectation belongs to and the expectation class. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 1 + net/netfilter/nf_conntrack_expect.c | 9 +++++++++ net/netfilter/nf_conntrack_sip.c | 3 +++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 9a2b9cb52271..917e170fa752 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -66,6 +66,7 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; + const char *name; }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 2f25ff610982..33b85f834c06 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -500,6 +500,7 @@ static void exp_seq_stop(struct seq_file *seq, void *v) static int exp_seq_show(struct seq_file *s, void *v) { struct nf_conntrack_expect *expect; + struct nf_conntrack_helper *helper; struct hlist_node *n = v; char *delim = ""; @@ -525,6 +526,14 @@ static int exp_seq_show(struct seq_file *s, void *v) if (expect->flags & NF_CT_EXPECT_INACTIVE) seq_printf(s, "%sINACTIVE", delim); + helper = rcu_dereference(nfct_help(expect->master)->helper); + if (helper) { + seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); + if (helper->expect_policy[expect->class].name) + seq_printf(s, "/%s", + helper->expect_policy[expect->class].name); + } + return seq_putc(s, '\n'); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 023966b569bf..419c5cabb332 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1305,14 +1305,17 @@ static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { + .name = "signalling", .max_expected = 1, .timeout = 3 * 60, }, [SIP_EXPECT_AUDIO] = { + .name = "audio", .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, [SIP_EXPECT_VIDEO] = { + .name = "video", .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, -- cgit v1.2.3 From 3b6b9fab42fe98358d70735cf98d43fc18dc79c9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:23:53 +0100 Subject: netfilter: nf_conntrack_sip: pass data offset to NAT functions When using TCP multiple SIP messages might be present in a single packet. A following patch will parse them by setting the dptr to the beginning of each message. The NAT helper needs to reload the dptr value after mangling the packet however, so it needs to know the offset of the message to the beginning of the packet. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_sip.h | 14 ++-- net/ipv4/netfilter/nf_nat_sip.c | 101 +++++++++++++++-------------- net/netfilter/nf_conntrack_sip.c | 82 ++++++++++++----------- 3 files changed, 105 insertions(+), 92 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 23aa2ec6b7b7..2c6950b8bf7e 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -34,10 +34,10 @@ struct sdp_media_type { struct sip_handler { const char *method; unsigned int len; - int (*request)(struct sk_buff *skb, + int (*request)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq); - int (*response)(struct sk_buff *skb, + int (*response)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code); }; @@ -100,33 +100,39 @@ enum sdp_header_types { }; extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, + unsigned int dataoff, const char **dptr, unsigned int *datalen); extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, unsigned int matchoff, unsigned int matchlen); extern unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, - const char **dptr, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr); extern unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, + unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, u_int16_t port); extern unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - const char **dptr, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, const union nf_inet_addr *addr); extern unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 07d61a57613c..2454ea5abb79 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -29,7 +29,7 @@ MODULE_DESCRIPTION("SIP NAT helper"); MODULE_ALIAS("ip_nat_sip"); -static unsigned int mangle_packet(struct sk_buff *skb, +static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, const char *buffer, unsigned int buflen) @@ -42,12 +42,12 @@ static unsigned int mangle_packet(struct sk_buff *skb, return 0; /* Reload data pointer and adjust datalen value */ - *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + *dptr = skb->data + dataoff; *datalen += buflen - matchlen; return 1; } -static int map_addr(struct sk_buff *skb, +static int map_addr(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, union nf_inet_addr *addr, __be16 port) @@ -76,11 +76,11 @@ static int map_addr(struct sk_buff *skb, buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport)); - return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen); } -static int map_sip_addr(struct sk_buff *skb, +static int map_sip_addr(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, enum sip_header_types type) { @@ -93,16 +93,17 @@ static int map_sip_addr(struct sk_buff *skb, if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL, &matchoff, &matchlen, &addr, &port) <= 0) return 1; - return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port); + return map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, + &addr, port); } -static unsigned int ip_nat_sip(struct sk_buff *skb, +static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned int dataoff, matchoff, matchlen; + unsigned int coff, matchoff, matchlen; union nf_inet_addr addr; __be16 port; int request, in_header; @@ -112,7 +113,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, if (ct_sip_parse_request(ct, *dptr, *datalen, &matchoff, &matchlen, &addr, &port) > 0 && - !map_addr(skb, dptr, datalen, matchoff, matchlen, + !map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; request = 1; @@ -138,7 +139,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, goto next; } - if (!map_addr(skb, dptr, datalen, matchoff, matchlen, + if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; @@ -153,8 +154,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) { buflen = sprintf(buffer, "%pI4", &ct->tuplehash[!dir].tuple.dst.u3.ip); - if (!mangle_packet(skb, dptr, datalen, poff, plen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) return NF_DROP; } @@ -167,8 +168,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { buflen = sprintf(buffer, "%pI4", &ct->tuplehash[!dir].tuple.src.u3.ip); - if (!mangle_packet(skb, dptr, datalen, poff, plen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) return NF_DROP; } @@ -181,27 +182,27 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) { __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port; buflen = sprintf(buffer, "%u", ntohs(p)); - if (!mangle_packet(skb, dptr, datalen, poff, plen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + poff, plen, buffer, buflen)) return NF_DROP; } } next: /* Translate Contact headers */ - dataoff = 0; + coff = 0; in_header = 0; - while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, + while (ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, SIP_HDR_CONTACT, &in_header, &matchoff, &matchlen, &addr, &port) > 0) { - if (!map_addr(skb, dptr, datalen, matchoff, matchlen, + if (!map_addr(skb, dataoff, dptr, datalen, matchoff, matchlen, &addr, port)) return NF_DROP; } - if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) || - !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO)) + if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || + !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; return NF_ACCEPT; } @@ -232,7 +233,7 @@ static void ip_nat_sip_expected(struct nf_conn *ct, } } -static unsigned int ip_nat_sip_expect(struct sk_buff *skb, +static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, unsigned int matchoff, @@ -279,8 +280,8 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, if (exp->tuple.dst.u3.ip != exp->saved_ip || exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) { buflen = sprintf(buffer, "%pI4:%u", &newip, port); - if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, - buffer, buflen)) + if (!mangle_packet(skb, dataoff, dptr, datalen, + matchoff, matchlen, buffer, buflen)) goto err; } return NF_ACCEPT; @@ -290,7 +291,7 @@ err: return NF_DROP; } -static int mangle_content_len(struct sk_buff *skb, +static int mangle_content_len(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -312,12 +313,13 @@ static int mangle_content_len(struct sk_buff *skb, return 0; buflen = sprintf(buffer, "%u", c_len); - return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen); } -static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, unsigned int *datalen, +static int mangle_sdp_packet(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, char *buffer, int buflen) @@ -326,16 +328,16 @@ static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); unsigned int matchlen, matchoff; - if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, + if (ct_sip_get_sdp_header(ct, *dptr, sdpoff, *datalen, type, term, &matchoff, &matchlen) <= 0) return -ENOENT; - return mangle_packet(skb, dptr, datalen, matchoff, matchlen, + return mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen) ? 0 : -EINVAL; } -static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, - unsigned int *datalen, +static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr) @@ -344,16 +346,15 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, unsigned int buflen; buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, + if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, type, term, buffer, buflen)) return 0; - return mangle_content_len(skb, dptr, datalen); + return mangle_content_len(skb, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_port(struct sk_buff *skb, - const char **dptr, - unsigned int *datalen, +static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, unsigned int matchoff, unsigned int matchlen, u_int16_t port) @@ -362,16 +363,16 @@ static unsigned int ip_nat_sdp_port(struct sk_buff *skb, unsigned int buflen; buflen = sprintf(buffer, "%u", port); - if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen, + if (!mangle_packet(skb, dataoff, dptr, datalen, matchoff, matchlen, buffer, buflen)) return 0; - return mangle_content_len(skb, dptr, datalen); + return mangle_content_len(skb, dataoff, dptr, datalen); } -static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, - unsigned int dataoff, - unsigned int *datalen, +static unsigned int ip_nat_sdp_session(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, const union nf_inet_addr *addr) { char buffer[sizeof("nnn.nnn.nnn.nnn")]; @@ -379,12 +380,12 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, /* Mangle session description owner and contact addresses */ buflen = sprintf(buffer, "%pI4", &addr->ip); - if (mangle_sdp_packet(skb, dptr, dataoff, datalen, + if (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, buffer, buflen)) return 0; - switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, + switch (mangle_sdp_packet(skb, dataoff, dptr, datalen, sdpoff, SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, buffer, buflen)) { case 0: @@ -401,14 +402,13 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, return 0; } - return mangle_content_len(skb, dptr, datalen); + return mangle_content_len(skb, dataoff, dptr, datalen); } /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp_media(struct sk_buff *skb, - const char **dptr, - unsigned int *datalen, +static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp, unsigned int mediaoff, @@ -456,7 +456,8 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, /* Update media port. */ if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port && - !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port)) + !ip_nat_sdp_port(skb, dataoff, dptr, datalen, + mediaoff, medialen, port)) goto err2; return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0ca2f2b5c2fa..0ec37d6a2df7 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -50,12 +50,13 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, + unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *exp, @@ -63,17 +64,17 @@ unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int matchlen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_expect_hook); -unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_addr_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, - unsigned int dataoff, unsigned int *datalen, + unsigned int sdpoff, enum sdp_header_types type, enum sdp_header_types term, const union nf_inet_addr *addr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_addr_hook); -unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int matchoff, @@ -82,14 +83,15 @@ unsigned int (*nf_nat_sdp_port_hook)(struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_nat_sdp_port_hook); unsigned int (*nf_nat_sdp_session_hook)(struct sk_buff *skb, - const char **dptr, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int sdpoff, const union nf_inet_addr *addr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sdp_session_hook); -unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_media_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, struct nf_conntrack_expect *rtp_exp, @@ -729,7 +731,7 @@ static void flush_expectations(struct nf_conn *ct, bool media) spin_unlock_bh(&nf_conntrack_lock); } -static int set_expected_rtp_rtcp(struct sk_buff *skb, +static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, union nf_inet_addr *daddr, __be16 port, enum sip_expectation_classes class, @@ -806,7 +808,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, if (direct_rtp) { nf_nat_sdp_port = rcu_dereference(nf_nat_sdp_port_hook); if (nf_nat_sdp_port && - !nf_nat_sdp_port(skb, dptr, datalen, + !nf_nat_sdp_port(skb, dataoff, dptr, datalen, mediaoff, medialen, ntohs(rtp_port))) goto err1; } @@ -828,7 +830,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, nf_nat_sdp_media = rcu_dereference(nf_nat_sdp_media_hook); if (nf_nat_sdp_media && ct->status & IPS_NAT_MASK && !direct_rtp) - ret = nf_nat_sdp_media(skb, dptr, datalen, rtp_exp, rtcp_exp, + ret = nf_nat_sdp_media(skb, dataoff, dptr, datalen, + rtp_exp, rtcp_exp, mediaoff, medialen, daddr); else { if (nf_ct_expect_related(rtp_exp) == 0) { @@ -867,7 +870,7 @@ static const struct sdp_media_type *sdp_media_type(const char *dptr, return NULL; } -static int process_sdp(struct sk_buff *skb, +static int process_sdp(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -942,7 +945,7 @@ static int process_sdp(struct sk_buff *skb, else return NF_DROP; - ret = set_expected_rtp_rtcp(skb, dptr, datalen, + ret = set_expected_rtp_rtcp(skb, dataoff, dptr, datalen, &rtp_addr, htons(port), t->class, mediaoff, medialen); if (ret != NF_ACCEPT) @@ -950,8 +953,9 @@ static int process_sdp(struct sk_buff *skb, /* Update media connection address if present */ if (maddr_len && nf_nat_sdp_addr && ct->status & IPS_NAT_MASK) { - ret = nf_nat_sdp_addr(skb, dptr, mediaoff, datalen, - c_hdr, SDP_HDR_MEDIA, &rtp_addr); + ret = nf_nat_sdp_addr(skb, dataoff, dptr, datalen, + mediaoff, c_hdr, SDP_HDR_MEDIA, + &rtp_addr); if (ret != NF_ACCEPT) return ret; } @@ -961,14 +965,15 @@ static int process_sdp(struct sk_buff *skb, /* Update session connection and owner addresses */ nf_nat_sdp_session = rcu_dereference(nf_nat_sdp_session_hook); if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr); + ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, + &rtp_addr); if (ret == NF_ACCEPT && i > 0) help->help.ct_sip_info.invite_cseq = cseq; return ret; } -static int process_invite_response(struct sk_buff *skb, +static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -978,13 +983,13 @@ static int process_invite_response(struct sk_buff *skb, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dptr, datalen, cseq); + return process_sdp(skb, dataoff, dptr, datalen, cseq); else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_update_response(struct sk_buff *skb, +static int process_update_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -994,13 +999,13 @@ static int process_update_response(struct sk_buff *skb, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dptr, datalen, cseq); + return process_sdp(skb, dataoff, dptr, datalen, cseq); else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_prack_response(struct sk_buff *skb, +static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1010,13 +1015,13 @@ static int process_prack_response(struct sk_buff *skb, if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) - return process_sdp(skb, dptr, datalen, cseq); + return process_sdp(skb, dataoff, dptr, datalen, cseq); else if (help->help.ct_sip_info.invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } -static int process_bye_request(struct sk_buff *skb, +static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1031,7 +1036,7 @@ static int process_bye_request(struct sk_buff *skb, * signalling connections. The expectation is marked inactive and is activated * when receiving a response indicating success from the registrar. */ -static int process_register_request(struct sk_buff *skb, +static int process_register_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) { @@ -1101,7 +1106,7 @@ static int process_register_request(struct sk_buff *skb, nf_nat_sip_expect = rcu_dereference(nf_nat_sip_expect_hook); if (nf_nat_sip_expect && ct->status & IPS_NAT_MASK) - ret = nf_nat_sip_expect(skb, dptr, datalen, exp, + ret = nf_nat_sip_expect(skb, dataoff, dptr, datalen, exp, matchoff, matchlen); else { if (nf_ct_expect_related(exp) != 0) @@ -1117,7 +1122,7 @@ store_cseq: return ret; } -static int process_register_response(struct sk_buff *skb, +static int process_register_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq, unsigned int code) { @@ -1127,7 +1132,7 @@ static int process_register_response(struct sk_buff *skb, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr addr; __be16 port; - unsigned int matchoff, matchlen, dataoff = 0; + unsigned int matchoff, matchlen, coff = 0; unsigned int expires = 0; int in_contact = 0, ret; @@ -1154,7 +1159,7 @@ static int process_register_response(struct sk_buff *skb, while (1) { unsigned int c_expires = expires; - ret = ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen, + ret = ct_sip_parse_header_uri(ct, *dptr, &coff, *datalen, SIP_HDR_CONTACT, &in_contact, &matchoff, &matchlen, &addr, &port); @@ -1193,13 +1198,13 @@ static const struct sip_handler sip_handlers[] = { SIP_HANDLER("REGISTER", process_register_request, process_register_response), }; -static int process_sip_response(struct sk_buff *skb, +static int process_sip_response(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - unsigned int matchoff, matchlen; - unsigned int code, cseq, dataoff, i; + unsigned int matchoff, matchlen, matchend; + unsigned int code, cseq, i; if (*datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; @@ -1213,7 +1218,7 @@ static int process_sip_response(struct sk_buff *skb, cseq = simple_strtoul(*dptr + matchoff, NULL, 10); if (!cseq) return NF_DROP; - dataoff = matchoff + matchlen + 1; + matchend = matchoff + matchlen + 1; for (i = 0; i < ARRAY_SIZE(sip_handlers); i++) { const struct sip_handler *handler; @@ -1221,15 +1226,16 @@ static int process_sip_response(struct sk_buff *skb, handler = &sip_handlers[i]; if (handler->response == NULL) continue; - if (*datalen < dataoff + handler->len || - strnicmp(*dptr + dataoff, handler->method, handler->len)) + if (*datalen < matchend + handler->len || + strnicmp(*dptr + matchend, handler->method, handler->len)) continue; - return handler->response(skb, dptr, datalen, cseq, code); + return handler->response(skb, dataoff, dptr, datalen, + cseq, code); } return NF_ACCEPT; } -static int process_sip_request(struct sk_buff *skb, +static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen) { enum ip_conntrack_info ctinfo; @@ -1254,7 +1260,7 @@ static int process_sip_request(struct sk_buff *skb, if (!cseq) return NF_DROP; - return handler->request(skb, dptr, datalen, cseq); + return handler->request(skb, dataoff, dptr, datalen, cseq); } return NF_ACCEPT; } @@ -1288,13 +1294,13 @@ static int sip_help(struct sk_buff *skb, return NF_ACCEPT; if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) - ret = process_sip_request(skb, &dptr, &datalen); + ret = process_sip_request(skb, dataoff, &dptr, &datalen); else - ret = process_sip_response(skb, &dptr, &datalen); + ret = process_sip_response(skb, dataoff, &dptr, &datalen); if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, &dptr, &datalen)) + if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen)) ret = NF_DROP; } -- cgit v1.2.3 From f5b321bd37fbec9188feb1f721ab46a5ac0b35da Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:26:19 +0100 Subject: netfilter: nf_conntrack_sip: add TCP support Add TCP support, which is mandated by RFC3261 for all SIP elements. SIP over TCP is similar to UDP, except that messages are delimited by Content-Length: headers and multiple messages may appear in one packet. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_sip.h | 3 +- net/ipv4/netfilter/nf_nat_sip.c | 2 +- net/netfilter/nf_conntrack_sip.c | 205 +++++++++++++++++++++++++---- 3 files changed, 179 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 2c6950b8bf7e..fa9bb8981450 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -84,7 +84,8 @@ enum sip_header_types { SIP_HDR_FROM, SIP_HDR_TO, SIP_HDR_CONTACT, - SIP_HDR_VIA, + SIP_HDR_VIA_UDP, + SIP_HDR_VIA_TCP, SIP_HDR_EXPIRES, SIP_HDR_CONTENT_LENGTH, }; diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 2454ea5abb79..b232e4040dc6 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -122,7 +122,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, /* Translate topmost Via header and parameters */ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, - SIP_HDR_VIA, NULL, &matchoff, &matchlen, + SIP_HDR_VIA_UDP, NULL, &matchoff, &matchlen, &addr, &port) > 0) { unsigned int matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 0ec37d6a2df7..1cc75c5a822b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -287,7 +288,8 @@ static const struct sip_header ct_sip_hdrs[] = { [SIP_HDR_FROM] = SIP_HDR("From", "f", "sip:", skp_epaddr_len), [SIP_HDR_TO] = SIP_HDR("To", "t", "sip:", skp_epaddr_len), [SIP_HDR_CONTACT] = SIP_HDR("Contact", "m", "sip:", skp_epaddr_len), - [SIP_HDR_VIA] = SIP_HDR("Via", "v", "UDP ", epaddr_len), + [SIP_HDR_VIA_UDP] = SIP_HDR("Via", "v", "UDP ", epaddr_len), + [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len), [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len), [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), }; @@ -519,6 +521,33 @@ int ct_sip_parse_header_uri(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_parse_header_uri); +static int ct_sip_parse_param(const struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + const char *name, + unsigned int *matchoff, unsigned int *matchlen) +{ + const char *limit = dptr + datalen; + const char *start; + const char *end; + + limit = ct_sip_header_search(dptr + dataoff, limit, ",", strlen(",")); + if (!limit) + limit = dptr + datalen; + + start = ct_sip_header_search(dptr + dataoff, limit, name, strlen(name)); + if (!start) + return 0; + start += strlen(name); + + end = ct_sip_header_search(start, limit, ";", strlen(";")); + if (!end) + end = limit; + + *matchoff = start - dptr; + *matchlen = end - start; + return 1; +} + /* Parse address from header parameter and return address, offset and length */ int ct_sip_parse_address_param(const struct nf_conn *ct, const char *dptr, unsigned int dataoff, unsigned int datalen, @@ -577,6 +606,29 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr, } EXPORT_SYMBOL_GPL(ct_sip_parse_numerical_param); +static int ct_sip_parse_transport(struct nf_conn *ct, const char *dptr, + unsigned int dataoff, unsigned int datalen, + u8 *proto) +{ + unsigned int matchoff, matchlen; + + if (ct_sip_parse_param(ct, dptr, dataoff, datalen, "transport=", + &matchoff, &matchlen)) { + if (!strnicmp(dptr + matchoff, "TCP", strlen("TCP"))) + *proto = IPPROTO_TCP; + else if (!strnicmp(dptr + matchoff, "UDP", strlen("UDP"))) + *proto = IPPROTO_UDP; + else + return 0; + + if (*proto != nf_ct_protonum(ct)) + return 0; + } else + *proto = nf_ct_protonum(ct); + + return 1; +} + /* SDP header parsing: a SDP session description contains an ordered set of * headers, starting with a section containing general session parameters, * optionally followed by multiple media descriptions. @@ -685,7 +737,7 @@ static int ct_sip_parse_sdp_addr(const struct nf_conn *ct, const char *dptr, static int refresh_signalling_expectation(struct nf_conn *ct, union nf_inet_addr *addr, - __be16 port, + u8 proto, __be16 port, unsigned int expires) { struct nf_conn_help *help = nfct_help(ct); @@ -697,6 +749,7 @@ static int refresh_signalling_expectation(struct nf_conn *ct, hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) { if (exp->class != SIP_EXPECT_SIGNALLING || !nf_inet_addr_cmp(&exp->tuple.dst.u3, addr) || + exp->tuple.dst.protonum != proto || exp->tuple.dst.u.udp.port != port) continue; if (!del_timer(&exp->timeout)) @@ -1048,6 +1101,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_expect *exp; union nf_inet_addr *saddr, daddr; __be16 port; + u8 proto; unsigned int expires = 0; int ret; typeof(nf_nat_sip_expect_hook) nf_nat_sip_expect; @@ -1080,6 +1134,10 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, &daddr)) return NF_ACCEPT; + if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, *datalen, + &proto) == 0) + return NF_ACCEPT; + if (ct_sip_parse_numerical_param(ct, *dptr, matchoff + matchlen, *datalen, "expires=", NULL, NULL, &expires) < 0) @@ -1099,7 +1157,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, saddr = &ct->tuplehash[!dir].tuple.src.u3; nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct), - saddr, &daddr, IPPROTO_UDP, NULL, &port); + saddr, &daddr, proto, NULL, &port); exp->timeout.expires = sip_timeout * HZ; exp->helper = nfct_help(ct)->helper; exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE; @@ -1132,6 +1190,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr addr; __be16 port; + u8 proto; unsigned int matchoff, matchlen, coff = 0; unsigned int expires = 0; int in_contact = 0, ret; @@ -1172,6 +1231,10 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, &addr)) continue; + if (ct_sip_parse_transport(ct, *dptr, matchoff + matchlen, + *datalen, &proto) == 0) + continue; + ret = ct_sip_parse_numerical_param(ct, *dptr, matchoff + matchlen, *datalen, "expires=", @@ -1180,7 +1243,8 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, return NF_DROP; if (c_expires == 0) break; - if (refresh_signalling_expectation(ct, &addr, port, c_expires)) + if (refresh_signalling_expectation(ct, &addr, proto, port, + c_expires)) return NF_ACCEPT; } @@ -1265,50 +1329,123 @@ static int process_sip_request(struct sk_buff *skb, unsigned int dataoff, return NF_ACCEPT; } -static int sip_help(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo) +static int process_sip_msg(struct sk_buff *skb, struct nf_conn *ct, + unsigned int dataoff, const char **dptr, + unsigned int *datalen) +{ + typeof(nf_nat_sip_hook) nf_nat_sip; + int ret; + + if (strnicmp(*dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) + ret = process_sip_request(skb, dataoff, dptr, datalen); + else + ret = process_sip_response(skb, dataoff, dptr, datalen); + + if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { + nf_nat_sip = rcu_dereference(nf_nat_sip_hook); + if (nf_nat_sip && !nf_nat_sip(skb, dataoff, dptr, datalen)) + ret = NF_DROP; + } + + return ret; +} + +static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) { + struct tcphdr *th, _tcph; unsigned int dataoff, datalen; - const char *dptr; + unsigned int matchoff, matchlen, clen; + unsigned int msglen, origlen; + const char *dptr, *end; + s16 diff, tdiff = 0; int ret; - typeof(nf_nat_sip_hook) nf_nat_sip; + + if (ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) + return NF_ACCEPT; /* No Data ? */ - dataoff = protoff + sizeof(struct udphdr); + th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + dataoff = protoff + th->doff * 4; if (dataoff >= skb->len) return NF_ACCEPT; nf_ct_refresh(ct, skb, sip_timeout * HZ); - if (!skb_is_nonlinear(skb)) - dptr = skb->data + dataoff; - else { + if (skb_is_nonlinear(skb)) { pr_debug("Copy of skbuff not supported yet.\n"); return NF_ACCEPT; } + dptr = skb->data + dataoff; datalen = skb->len - dataoff; if (datalen < strlen("SIP/2.0 200")) return NF_ACCEPT; - if (strnicmp(dptr, "SIP/2.0 ", strlen("SIP/2.0 ")) != 0) - ret = process_sip_request(skb, dataoff, &dptr, &datalen); - else - ret = process_sip_response(skb, dataoff, &dptr, &datalen); + while (1) { + if (ct_sip_get_header(ct, dptr, 0, datalen, + SIP_HDR_CONTENT_LENGTH, + &matchoff, &matchlen) <= 0) + break; - if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { - nf_nat_sip = rcu_dereference(nf_nat_sip_hook); - if (nf_nat_sip && !nf_nat_sip(skb, dataoff, &dptr, &datalen)) - ret = NF_DROP; + clen = simple_strtoul(dptr + matchoff, (char **)&end, 10); + if (dptr + matchoff == end) + break; + + if (end + strlen("\r\n\r\n") > dptr + datalen) + break; + if (end[0] != '\r' || end[1] != '\n' || + end[2] != '\r' || end[3] != '\n') + break; + end += strlen("\r\n\r\n") + clen; + + msglen = origlen = end - dptr; + + ret = process_sip_msg(skb, ct, dataoff, &dptr, &msglen); + if (ret != NF_ACCEPT) + break; + diff = msglen - origlen; + tdiff += diff; + + dataoff += msglen; + dptr += msglen; + datalen = datalen + diff - msglen; } return ret; } -static struct nf_conntrack_helper sip[MAX_PORTS][2] __read_mostly; -static char sip_names[MAX_PORTS][2][sizeof("sip-65535")] __read_mostly; +static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + const char *dptr; + + /* No Data ? */ + dataoff = protoff + sizeof(struct udphdr); + if (dataoff >= skb->len) + return NF_ACCEPT; + + nf_ct_refresh(ct, skb, sip_timeout * HZ); + + if (skb_is_nonlinear(skb)) { + pr_debug("Copy of skbuff not supported yet.\n"); + return NF_ACCEPT; + } + + dptr = skb->data + dataoff; + datalen = skb->len - dataoff; + if (datalen < strlen("SIP/2.0 200")) + return NF_ACCEPT; + + return process_sip_msg(skb, ct, dataoff, &dptr, &datalen); +} + +static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; +static char sip_names[MAX_PORTS][4][sizeof("sip-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1333,7 +1470,7 @@ static void nf_conntrack_sip_fini(void) int i, j; for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) { + for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { if (sip[i][j].me == NULL) continue; nf_conntrack_helper_unregister(&sip[i][j]); @@ -1353,14 +1490,24 @@ static int __init nf_conntrack_sip_init(void) memset(&sip[i], 0, sizeof(sip[i])); sip[i][0].tuple.src.l3num = AF_INET; - sip[i][1].tuple.src.l3num = AF_INET6; - for (j = 0; j < 2; j++) { - sip[i][j].tuple.dst.protonum = IPPROTO_UDP; + sip[i][0].tuple.dst.protonum = IPPROTO_UDP; + sip[i][0].help = sip_help_udp; + sip[i][1].tuple.src.l3num = AF_INET; + sip[i][1].tuple.dst.protonum = IPPROTO_TCP; + sip[i][1].help = sip_help_tcp; + + sip[i][2].tuple.src.l3num = AF_INET6; + sip[i][2].tuple.dst.protonum = IPPROTO_UDP; + sip[i][2].help = sip_help_udp; + sip[i][3].tuple.src.l3num = AF_INET6; + sip[i][3].tuple.dst.protonum = IPPROTO_TCP; + sip[i][3].help = sip_help_tcp; + + for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { sip[i][j].tuple.src.u.udp.port = htons(ports[i]); sip[i][j].expect_policy = sip_exp_policy; sip[i][j].expect_class_max = SIP_EXPECT_MAX; sip[i][j].me = THIS_MODULE; - sip[i][j].help = sip_help; tmpname = &sip_names[i][j][0]; if (ports[i] == SIP_PORT) -- cgit v1.2.3 From 010c0b9f34a4c567b431f8b49a58b7332ed42e47 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:27:09 +0100 Subject: netfilter: nf_nat: support mangling a single TCP packet multiple times nf_nat_mangle_tcp_packet() can currently only handle a single mangling per window because it only maintains two sequence adjustment positions: the one before the last adjustment and the one after. This patch makes sequence number adjustment tracking in nf_nat_mangle_tcp_packet() optional and allows a helper to manually update the offsets after the packet has been fully handled. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_helper.h | 32 +++++++++++++++++++++------- net/ipv4/netfilter/nf_nat_helper.c | 39 +++++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 4222220920a5..02bb6c29dc3d 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -7,13 +7,27 @@ struct sk_buff; /* These return true or false. */ -extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len); +extern int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust); + +static inline int nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len) +{ + return __nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + match_offset, match_len, + rep_buffer, rep_len, true); +} + extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -21,6 +35,10 @@ extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); + +extern void nf_nat_set_seq_adjust(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + __be32 seq, s16 off); extern int nf_nat_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 7f10a6be0191..4b6af4bb1f50 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -141,6 +141,17 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) return 1; } +void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + __be32 seq, s16 off) +{ + if (!off) + return; + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(seq), off, ct, ctinfo); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); +} +EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); + /* Generic function for mangling variable-length address changes inside * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX * command in FTP). @@ -149,14 +160,13 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) * skb enlargement, ... * * */ -int -nf_nat_mangle_tcp_packet(struct sk_buff *skb, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) +int __nf_nat_mangle_tcp_packet(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int match_offset, + unsigned int match_len, + const char *rep_buffer, + unsigned int rep_len, bool adjust) { struct rtable *rt = skb_rtable(skb); struct iphdr *iph; @@ -202,16 +212,13 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, inet_proto_csum_replace2(&tcph->check, skb, htons(oldlen), htons(datalen), 1); - if (rep_len != match_len) { - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(tcph->seq), - (int)rep_len - (int)match_len, - ct, ctinfo); - nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); - } + if (adjust && rep_len != match_len) + nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, + (int)rep_len - (int)match_len); + return 1; } -EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); +EXPORT_SYMBOL(__nf_nat_mangle_tcp_packet); /* Generic function for mangling variable-length address changes inside * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX -- cgit v1.2.3 From 48f8ac26537c1b7b1a2422f5232f45d06c945348 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:29:38 +0100 Subject: netfilter: nf_nat_sip: add TCP support Add support for mangling TCP SIP packets. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_sip.h | 1 + net/ipv4/netfilter/nf_nat_sip.c | 53 ++++++++++++++++++++++++++---- net/netfilter/nf_conntrack_sip.c | 10 ++++++ 3 files changed, 58 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index fa9bb8981450..cd84d6f44d11 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -104,6 +104,7 @@ extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen); +extern void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off); extern unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index b232e4040dc6..11b538deaaec 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -1,4 +1,4 @@ -/* SIP extension for UDP NAT alteration. +/* SIP extension for NAT alteration. * * (C) 2005 by Christian Hentschel * based on RR's ip_nat_ftp.c and other modules. @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -36,10 +37,27 @@ static unsigned int mangle_packet(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen, - buffer, buflen)) - return 0; + struct tcphdr *th; + unsigned int baseoff; + + if (nf_ct_protonum(ct) == IPPROTO_TCP) { + th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); + baseoff = ip_hdrlen(skb) + th->doff * 4; + matchoff += dataoff - baseoff; + + if (!__nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + matchoff, matchlen, + buffer, buflen, false)) + return 0; + } else { + baseoff = ip_hdrlen(skb) + sizeof(struct udphdr); + matchoff += dataoff - baseoff; + + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + matchoff, matchlen, + buffer, buflen)) + return 0; + } /* Reload data pointer and adjust datalen value */ *dptr = skb->data + dataoff; @@ -104,6 +122,7 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, struct nf_conn *ct = nf_ct_get(skb, &ctinfo); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int coff, matchoff, matchlen; + enum sip_header_types hdr; union nf_inet_addr addr; __be16 port; int request, in_header; @@ -120,9 +139,14 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, unsigned int dataoff, } else request = 0; + if (nf_ct_protonum(ct) == IPPROTO_TCP) + hdr = SIP_HDR_VIA_TCP; + else + hdr = SIP_HDR_VIA_UDP; + /* Translate topmost Via header and parameters */ if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, - SIP_HDR_VIA_UDP, NULL, &matchoff, &matchlen, + hdr, NULL, &matchoff, &matchlen, &addr, &port) > 0) { unsigned int matchend, poff, plen, buflen, n; char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")]; @@ -204,9 +228,23 @@ next: if (!map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_FROM) || !map_sip_addr(skb, dataoff, dptr, datalen, SIP_HDR_TO)) return NF_DROP; + return NF_ACCEPT; } +static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP || off == 0) + return; + + th = (struct tcphdr *)(skb->data + ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} + /* Handles expected signalling connections and media streams */ static void ip_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) @@ -472,6 +510,7 @@ err1: static void __exit nf_nat_sip_fini(void) { rcu_assign_pointer(nf_nat_sip_hook, NULL); + rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, NULL); rcu_assign_pointer(nf_nat_sip_expect_hook, NULL); rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL); rcu_assign_pointer(nf_nat_sdp_port_hook, NULL); @@ -483,12 +522,14 @@ static void __exit nf_nat_sip_fini(void) static int __init nf_nat_sip_init(void) { BUG_ON(nf_nat_sip_hook != NULL); + BUG_ON(nf_nat_sip_seq_adjust_hook != NULL); BUG_ON(nf_nat_sip_expect_hook != NULL); BUG_ON(nf_nat_sdp_addr_hook != NULL); BUG_ON(nf_nat_sdp_port_hook != NULL); BUG_ON(nf_nat_sdp_session_hook != NULL); BUG_ON(nf_nat_sdp_media_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); + rcu_assign_pointer(nf_nat_sip_seq_adjust_hook, ip_nat_sip_seq_adjust); rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect); rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr); rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port); diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1cc75c5a822b..3bb3aaff76e9 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -56,6 +56,9 @@ unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, unsigned int dataoff, unsigned int *datalen) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); +void (*nf_nat_sip_seq_adjust_hook)(struct sk_buff *skb, s16 off) __read_mostly; +EXPORT_SYMBOL_GPL(nf_nat_sip_seq_adjust_hook); + unsigned int (*nf_nat_sip_expect_hook)(struct sk_buff *skb, unsigned int dataoff, const char **dptr, @@ -1360,6 +1363,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, const char *dptr, *end; s16 diff, tdiff = 0; int ret; + typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) @@ -1415,6 +1419,12 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, datalen = datalen + diff - msglen; } + if (ret == NF_ACCEPT && ct->status & IPS_NAT_MASK) { + nf_nat_sip_seq_adjust = rcu_dereference(nf_nat_sip_seq_adjust_hook); + if (nf_nat_sip_seq_adjust) + nf_nat_sip_seq_adjust(skb, tdiff); + } + return ret; } -- cgit v1.2.3 From 9d288dffe3a276e1f06ba556845c456d696c5a4f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 11 Feb 2010 12:30:21 +0100 Subject: netfilter: nf_conntrack_sip: add T.38 FAX support Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_sip.h | 1 + include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_sip.c | 28 +++++++++++++++++++++++----- 3 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index cd84d6f44d11..ff8cfbcf3b81 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -14,6 +14,7 @@ enum sip_expectation_classes { SIP_EXPECT_SIGNALLING, SIP_EXPECT_AUDIO, SIP_EXPECT_VIDEO, + SIP_EXPECT_IMAGE, __SIP_EXPECT_MAX }; #define SIP_EXPECT_MAX (__SIP_EXPECT_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5043d61c99a7..5b7d8835523f 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -70,7 +70,7 @@ union nf_conntrack_help { struct nf_conntrack_helper; /* Must be kept in sync with the classes defined by helpers */ -#define NF_CT_MAX_EXPECT_CLASSES 3 +#define NF_CT_MAX_EXPECT_CLASSES 4 /* nf_conn feature for connections that have a helper */ struct nf_conn_help { diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 3bb3aaff76e9..fbe8ff5a420a 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -907,6 +907,7 @@ err1: static const struct sdp_media_type sdp_media_types[] = { SDP_MEDIA_TYPE("audio ", SIP_EXPECT_AUDIO), SDP_MEDIA_TYPE("video ", SIP_EXPECT_VIDEO), + SDP_MEDIA_TYPE("image ", SIP_EXPECT_IMAGE), }; static const struct sdp_media_type *sdp_media_type(const char *dptr, @@ -932,7 +933,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); unsigned int matchoff, matchlen; unsigned int mediaoff, medialen; unsigned int sdpoff; @@ -1024,9 +1024,6 @@ static int process_sdp(struct sk_buff *skb, unsigned int dataoff, ret = nf_nat_sdp_session(skb, dataoff, dptr, datalen, sdpoff, &rtp_addr); - if (ret == NF_ACCEPT && i > 0) - help->help.ct_sip_info.invite_cseq = cseq; - return ret; } static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, @@ -1077,6 +1074,22 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, return NF_ACCEPT; } +static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, + const char **dptr, unsigned int *datalen, + unsigned int cseq) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_help *help = nfct_help(ct); + unsigned int ret; + + flush_expectations(ct, true); + ret = process_sdp(skb, dataoff, dptr, datalen, cseq); + if (ret == NF_ACCEPT) + help->help.ct_sip_info.invite_cseq = cseq; + return ret; +} + static int process_bye_request(struct sk_buff *skb, unsigned int dataoff, const char **dptr, unsigned int *datalen, unsigned int cseq) @@ -1257,7 +1270,7 @@ flush: } static const struct sip_handler sip_handlers[] = { - SIP_HANDLER("INVITE", process_sdp, process_invite_response), + SIP_HANDLER("INVITE", process_invite_request, process_invite_response), SIP_HANDLER("UPDATE", process_sdp, process_update_response), SIP_HANDLER("ACK", process_sdp, NULL), SIP_HANDLER("PRACK", process_sdp, process_prack_response), @@ -1473,6 +1486,11 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 .max_expected = 2 * IP_CT_DIR_MAX, .timeout = 3 * 60, }, + [SIP_EXPECT_IMAGE] = { + .name = "image", + .max_expected = IP_CT_DIR_MAX, + .timeout = 3 * 60, + }, }; static void nf_conntrack_sip_fini(void) -- cgit v1.2.3 From 857b409a48bdc33e824dff2d730e271b964e78bd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 12 Feb 2010 06:24:46 +0100 Subject: netfilter: nf_conntrack: elegantly simplify nf_ct_exp_net() Remove #ifdef at nf_ct_exp_net() by using nf_ct_net(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 917e170fa752..4b47ec19ef39 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -56,11 +56,7 @@ struct nf_conntrack_expect { static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) { -#ifdef CONFIG_NET_NS - return exp->master->ct_net; /* by definition */ -#else - return &init_net; -#endif + return nf_ct_net(exp->master); } struct nf_conntrack_expect_policy { -- cgit v1.2.3 From 23f3733d440b918ccb7746718f77256334cf6176 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 5 Jun 2009 17:31:46 +0200 Subject: netfilter: reduce NF_HOOK by one argument No changes in vmlinux filesize. Signed-off-by: Jan Engelhardt --- include/linux/netfilter.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 78f33d223680..2f22816a5514 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -163,11 +163,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh, - int cond) + int (*okfn)(struct sk_buff *), int thresh) { - if (!cond) - return 1; #ifndef CONFIG_NETFILTER_DEBUG if (list_empty(&nf_hooks[pf][hook])) return 1; @@ -179,7 +176,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1); + return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -206,13 +203,13 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh)) == 1)\ __ret = (okfn)(skb); \ __ret;}) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ +if ((cond) || (__ret = nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -328,8 +325,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh, - int cond) + int (*okfn)(struct sk_buff *), int thresh) { return okfn(skb); } -- cgit v1.2.3 From 2249065f4b22b493bae2caf549b86f175f33188e Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 13 Jun 2009 04:13:26 +0200 Subject: netfilter: get rid of the grossness in netfilter.h GCC is now smart enough to follow the inline trail correctly. vmlinux size remain the same. Signed-off-by: Jan Engelhardt --- include/linux/netfilter.h | 45 ++++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2f22816a5514..70079454ffd0 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -196,25 +196,36 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb, coders :) */ -/* This is gross, but inline doesn't cut it for avoiding the function - call in fast path: gcc doesn't inline (needs value tracking?). --RR */ - -/* HX: It's slightly less gross now. */ - -#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ -({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh)) == 1)\ - __ret = (okfn)(skb); \ -__ret;}) +static inline int +NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sk_buff *skb, + struct net_device *in, struct net_device *out, + int (*okfn)(struct sk_buff *), int thresh) +{ + int ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, thresh); + if (ret == 1) + ret = okfn(skb); + return ret; +} -#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ -({int __ret; \ -if ((cond) || (__ret = nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN)) == 1)\ - __ret = (okfn)(skb); \ -__ret;}) +static inline int +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sk_buff *skb, + struct net_device *in, struct net_device *out, + int (*okfn)(struct sk_buff *), bool cond) +{ + int ret = 1; + if (cond || + (ret = nf_hook_thresh(pf, hook, skb, in, out, okfn, INT_MIN) == 1)) + ret = okfn(skb); + return ret; +} -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ - NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN) +static inline int +NF_HOOK(uint8_t pf, unsigned int hook, struct sk_buff *skb, + struct net_device *in, struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return NF_HOOK_THRESH(pf, hook, skb, in, out, okfn, INT_MIN); +} /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, -- cgit v1.2.3 From 739674fb7febf116e7d647031fab16989a08a965 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 26 Jun 2009 08:23:19 +0200 Subject: netfilter: xtables: constify args in compat copying functions Signed-off-by: Jan Engelhardt --- include/linux/netfilter/x_tables.h | 12 ++++++------ net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv4/netfilter/ipt_ULOG.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- net/netfilter/x_tables.c | 8 ++++---- net/netfilter/xt_hashlimit.c | 4 ++-- net/netfilter/xt_limit.c | 4 ++-- 8 files changed, 22 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index a0a144a6c6d9..c61758f4be31 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -286,8 +286,8 @@ struct xt_match { void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ - void (*compat_from_user)(void *dst, void *src); - int (*compat_to_user)(void __user *dst, void *src); + void (*compat_from_user)(void *dst, const void *src); + int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -328,8 +328,8 @@ struct xt_target { void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ - void (*compat_from_user)(void *dst, void *src); - int (*compat_to_user)(void __user *dst, void *src); + void (*compat_from_user)(void *dst, const void *src); + int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -593,13 +593,13 @@ extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size); -extern int xt_compat_match_to_user(struct xt_entry_match *m, +extern int xt_compat_match_to_user(const struct xt_entry_match *m, void __user **dstptr, unsigned int *size); extern int xt_compat_target_offset(const struct xt_target *target); extern void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, unsigned int *size); -extern int xt_compat_target_to_user(struct xt_entry_target *t, +extern int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); #endif /* CONFIG_COMPAT */ diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 72723ea1054b..2303dc92a277 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -842,7 +842,7 @@ static int copy_entries_to_user(unsigned int total_size, } #ifdef CONFIG_COMPAT -static void compat_standard_from_user(void *dst, void *src) +static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; @@ -851,7 +851,7 @@ static void compat_standard_from_user(void *dst, void *src) memcpy(dst, &v, sizeof(v)); } -static int compat_standard_to_user(void __user *dst, void *src) +static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2057b1bb6178..2a4f745ce36e 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1047,7 +1047,7 @@ copy_entries_to_user(unsigned int total_size, } #ifdef CONFIG_COMPAT -static void compat_standard_from_user(void *dst, void *src) +static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; @@ -1056,7 +1056,7 @@ static void compat_standard_from_user(void *dst, void *src) memcpy(dst, &v, sizeof(v)); } -static int compat_standard_to_user(void __user *dst, void *src) +static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 399061c3fd7d..09a5d3f7cc41 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -338,7 +338,7 @@ struct compat_ipt_ulog_info { char prefix[ULOG_PREFIX_LEN]; }; -static void ulog_tg_compat_from_user(void *dst, void *src) +static void ulog_tg_compat_from_user(void *dst, const void *src) { const struct compat_ipt_ulog_info *cl = src; struct ipt_ulog_info l = { @@ -351,7 +351,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src) memcpy(dst, &l, sizeof(l)); } -static int ulog_tg_compat_to_user(void __user *dst, void *src) +static int ulog_tg_compat_to_user(void __user *dst, const void *src) { const struct ipt_ulog_info *l = src; struct compat_ipt_ulog_info cl = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dcd7825fe7b6..3ff4fd50e96e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1079,7 +1079,7 @@ copy_entries_to_user(unsigned int total_size, } #ifdef CONFIG_COMPAT -static void compat_standard_from_user(void *dst, void *src) +static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; @@ -1088,7 +1088,7 @@ static void compat_standard_from_user(void *dst, void *src) memcpy(dst, &v, sizeof(v)); } -static int compat_standard_to_user(void __user *dst, void *src) +static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 12503199826f..69c56287d518 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -485,8 +485,8 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_match_from_user); -int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, - unsigned int *size) +int xt_compat_match_to_user(const struct xt_entry_match *m, + void __user **dstptr, unsigned int *size) { const struct xt_match *match = m->u.kernel.match; struct compat_xt_entry_match __user *cm = *dstptr; @@ -588,8 +588,8 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, } EXPORT_SYMBOL_GPL(xt_compat_target_from_user); -int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, - unsigned int *size) +int xt_compat_target_to_user(const struct xt_entry_target *t, + void __user **dstptr, unsigned int *size) { const struct xt_target *target = t->u.kernel.target; struct compat_xt_entry_target __user *ct = *dstptr; diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 017c95966aa8..e47fb805ffb4 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -775,7 +775,7 @@ struct compat_xt_hashlimit_info { compat_uptr_t master; }; -static void hashlimit_mt_compat_from_user(void *dst, void *src) +static void hashlimit_mt_compat_from_user(void *dst, const void *src) { int off = offsetof(struct compat_xt_hashlimit_info, hinfo); @@ -783,7 +783,7 @@ static void hashlimit_mt_compat_from_user(void *dst, void *src) memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off); } -static int hashlimit_mt_compat_to_user(void __user *dst, void *src) +static int hashlimit_mt_compat_to_user(void __user *dst, const void *src) { int off = offsetof(struct compat_xt_hashlimit_info, hinfo); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 2773be6a71dd..a0ca5339af41 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -148,7 +148,7 @@ struct compat_xt_rateinfo { /* To keep the full "prev" timestamp, the upper 32 bits are stored in the * master pointer, which does not need to be preserved. */ -static void limit_mt_compat_from_user(void *dst, void *src) +static void limit_mt_compat_from_user(void *dst, const void *src) { const struct compat_xt_rateinfo *cm = src; struct xt_rateinfo m = { @@ -162,7 +162,7 @@ static void limit_mt_compat_from_user(void *dst, void *src) memcpy(dst, &m, sizeof(m)); } -static int limit_mt_compat_to_user(void __user *dst, void *src) +static int limit_mt_compat_to_user(void __user *dst, const void *src) { const struct xt_rateinfo *m = src; struct compat_xt_rateinfo cm = { -- cgit v1.2.3 From 8fea97ec1772bbf553d89187340ef624d548e115 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 17:45:08 +0100 Subject: netfilter: nf_conntrack: pass template to l4proto ->error() handler The error handlers might need the template to get the conntrack zone introduced in the next patches to perform a conntrack lookup. Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 3 ++- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 3 ++- net/netfilter/nf_conntrack_core.c | 3 ++- net/netfilter/nf_conntrack_proto_dccp.c | 5 +++-- net/netfilter/nf_conntrack_proto_tcp.c | 2 +- net/netfilter/nf_conntrack_proto_udp.c | 4 ++-- net/netfilter/nf_conntrack_proto_udplite.c | 2 +- 8 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index ca6dcf3445ab..e3d3ee3c06a2 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -49,8 +49,8 @@ struct nf_conntrack_l4proto { /* Called when a conntrack entry is destroyed */ void (*destroy)(struct nf_conn *ct); - int (*error)(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, + int (*error)(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum); /* Print out the per-protocol part of the tuple. Return like seq_* */ diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 7afd39b5b781..327826a968a8 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -163,7 +163,8 @@ icmp_error_message(struct net *net, struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index c7b8bd1d7984..d772dc21857f 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -179,7 +179,8 @@ icmpv6_error_message(struct net *net, } static int -icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, +icmpv6_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index bd831410a396..65351ed5d815 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -799,7 +799,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL) { - ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + ret = l4proto->error(net, tmpl, skb, dataoff, &ctinfo, + pf, hooknum); if (ret <= 0) { NF_CT_STAT_INC_ATOMIC(net, error); NF_CT_STAT_INC_ATOMIC(net, invalid); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index dd375500dccc..9a2815549375 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -561,8 +561,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct net *net, struct sk_buff *skb, - unsigned int dataoff, enum ip_conntrack_info *ctinfo, +static int dccp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ad118053971a..9dd8cd4fb6e6 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -760,7 +760,7 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct net *net, +static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8d38f9a4bed8..8289088b8218 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -91,8 +91,8 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, +static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 0b1bc9ba6678..263b5a72588d 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -89,7 +89,7 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct net *net, +static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, -- cgit v1.2.3 From 5d0aa2ccd4699a01cfdf14886191c249d7b45a01 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 18:13:33 +0100 Subject: netfilter: nf_conntrack: add support for "conntrack zones" Normally, each connection needs a unique identity. Conntrack zones allow to specify a numerical zone using the CT target, connections in different zones can use the same identity. Example: iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1 iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1 Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_CT.h | 2 +- include/net/ip.h | 3 + include/net/ipv6.h | 3 + include/net/netfilter/nf_conntrack.h | 5 +- include/net/netfilter/nf_conntrack_core.h | 3 +- include/net/netfilter/nf_conntrack_expect.h | 9 +- include/net/netfilter/nf_conntrack_extend.h | 2 + include/net/netfilter/nf_conntrack_zones.h | 23 ++++++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 3 +- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 8 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 12 ++- net/ipv4/netfilter/nf_nat_core.c | 24 +++--- net/ipv4/netfilter/nf_nat_pptp.c | 3 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 12 ++- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 8 +- net/netfilter/Kconfig | 13 +++ net/netfilter/nf_conntrack_core.c | 109 +++++++++++++++++++------ net/netfilter/nf_conntrack_expect.c | 21 +++-- net/netfilter/nf_conntrack_h323_main.c | 3 +- net/netfilter/nf_conntrack_netlink.c | 20 ++--- net/netfilter/nf_conntrack_pptp.c | 14 ++-- net/netfilter/nf_conntrack_sip.c | 3 +- net/netfilter/nf_conntrack_standalone.c | 6 ++ net/netfilter/xt_CT.c | 8 +- net/netfilter/xt_connlimit.c | 4 +- 25 files changed, 236 insertions(+), 85 deletions(-) create mode 100644 include/net/netfilter/nf_conntrack_zones.h (limited to 'include') diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h index 7fd0effe1316..1b564106891d 100644 --- a/include/linux/netfilter/xt_CT.h +++ b/include/linux/netfilter/xt_CT.h @@ -5,7 +5,7 @@ struct xt_ct_target_info { u_int16_t flags; - u_int16_t __unused; + u_int16_t zone; u_int32_t ct_events; u_int32_t exp_events; char helper[16]; diff --git a/include/net/ip.h b/include/net/ip.h index fb63371c07a8..7bc47873e3fc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -352,8 +352,11 @@ enum ip_defrag_users { IP_DEFRAG_LOCAL_DELIVER, IP_DEFRAG_CALL_RA_CHAIN, IP_DEFRAG_CONNTRACK_IN, + __IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP_DEFRAG_CONNTRACK_OUT, + __IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, IP_DEFRAG_VS_IN, IP_DEFRAG_VS_OUT, IP_DEFRAG_VS_FWD diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 299bbf5adfb6..639ec53ea081 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -355,8 +355,11 @@ struct inet_frag_queue; enum ip6_defrag_users { IP6_DEFRAG_LOCAL_DELIVER, IP6_DEFRAG_CONNTRACK_IN, + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX, IP6_DEFRAG_CONNTRACK_OUT, + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX, IP6_DEFRAG_CONNTRACK_BRIDGE_IN, + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX, }; struct ip6_create_arg { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5b7d8835523f..bde095f7e845 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -198,7 +198,8 @@ extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int null extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); extern struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); extern void nf_ct_delete_from_lists(struct nf_conn *ct); @@ -267,7 +268,7 @@ extern void nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data); extern void nf_conntrack_free(struct nf_conn *ct); extern struct nf_conn * -nf_conntrack_alloc(struct net *net, +nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 5a449b44ba33..dffde8e6920e 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -49,7 +49,8 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, /* Find a connection corresponding to a tuple. */ extern struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); extern int __nf_conntrack_confirm(struct sk_buff *skb); diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4b47ec19ef39..11e815084fcf 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -74,13 +74,16 @@ int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple); +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple); +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple); void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index e192dc17c583..2d2a1f9a61d8 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -8,6 +8,7 @@ enum nf_ct_ext_id { NF_CT_EXT_NAT, NF_CT_EXT_ACCT, NF_CT_EXT_ECACHE, + NF_CT_EXT_ZONE, NF_CT_EXT_NUM, }; @@ -15,6 +16,7 @@ enum nf_ct_ext_id { #define NF_CT_EXT_NAT_TYPE struct nf_conn_nat #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache +#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone /* Extensions: optional stuff which isn't permanently in struct. */ struct nf_ct_ext { diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h new file mode 100644 index 000000000000..0bbb2bd51e89 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -0,0 +1,23 @@ +#ifndef _NF_CONNTRACK_ZONES_H +#define _NF_CONNTRACK_ZONES_H + +#include + +#define NF_CT_DEFAULT_ZONE 0 + +struct nf_conntrack_zone { + u16 id; +}; + +static inline u16 nf_ct_zone(const struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone *nf_ct_zone; + nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + if (nf_ct_zone) + return nf_ct_zone->id; +#endif + return NF_CT_DEFAULT_ZONE; +} + +#endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d1ea38a7c490..2bb1f87051c4 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(sock_net(sk), &tuple); + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 327826a968a8..7404bde95994 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; @@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct net *net, struct sk_buff *skb, +icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; const struct nf_conntrack_l4proto *innerproto; const struct nf_conntrack_tuple_hash *h; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &innertuple); + h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -209,7 +211,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(net, skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index f6f46686cbc0..d498a704d456 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -39,15 +40,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP_DEFRAG_CONNTRACK_IN; + return IP_DEFRAG_CONNTRACK_IN + zone; else - return IP_DEFRAG_CONNTRACK_OUT; + return IP_DEFRAG_CONNTRACK_OUT + zone; } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 26066a2327ad..4595281c2863 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -30,6 +30,7 @@ #include #include #include +#include static DEFINE_SPINLOCK(nf_nat_lock); @@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple) +hash_by_src(const struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { unsigned int hash; /* Original src, to ensure we map it consistently if poss. */ hash = jhash_3words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->src.u.all, + (__force u32)tuple->src.u.all ^ zone, tuple->dst.protonum, 0); return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } @@ -139,12 +141,12 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(struct net *net, +find_appropriate_src(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, tuple); + unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; const struct nf_conn *ct; const struct hlist_node *n; @@ -152,7 +154,7 @@ find_appropriate_src(struct net *net, rcu_read_lock(); hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; - if (same_src(ct, tuple)) { + if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { /* Copy source part from reply tuple. */ nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); @@ -175,7 +177,7 @@ find_appropriate_src(struct net *net, the ip with the lowest src-ip/dst-ip/proto usage. */ static void -find_best_ips_proto(struct nf_conntrack_tuple *tuple, +find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, const struct nf_nat_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) @@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, range->flags & IP_NAT_RANGE_PERSISTENT ? - 0 : (__force u32)tuple->dst.u3.ip, 0); + 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } @@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, { struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; + u16 zone = nf_ct_zone(ct); /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(net, orig_tuple, tuple, range)) { + if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, /* 2) Select the least-used IP/proto combination in the given range. */ *tuple = *orig_tuple; - find_best_ips_proto(tuple, range, ct, maniptype); + find_best_ips_proto(zone, tuple, range, ct, maniptype); /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ @@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct, if (have_to_hash) { unsigned int srchash; - srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + srchash = hash_by_src(net, nf_ct_zone(ct), + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 9eb171056c63..4c060038d29f 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(net, &t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 55ce22e5de49..996c3f41fecd 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -191,15 +192,20 @@ out: static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, struct sk_buff *skb) { + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge && skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP6_DEFRAG_CONNTRACK_BRIDGE_IN; + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; #endif if (hooknum == NF_INET_PRE_ROUTING) - return IP6_DEFRAG_CONNTRACK_IN; + return IP6_DEFRAG_CONNTRACK_IN + zone; else - return IP6_DEFRAG_CONNTRACK_OUT; + return IP6_DEFRAG_CONNTRACK_OUT + zone; } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d772dc21857f..9be81776415e 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -128,7 +129,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct net *net, +icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, @@ -137,6 +138,7 @@ icmpv6_error_message(struct net *net, struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; const struct nf_conntrack_l4proto *inproto; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); @@ -163,7 +165,7 @@ icmpv6_error_message(struct net *net, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(net, &intuple); + h = nf_conntrack_find_get(net, zone, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -216,7 +218,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4469d45261f4..18d77b5c351a 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -83,6 +83,19 @@ config NF_CONNTRACK_SECMARK If unsure, say 'N'. +config NF_CONNTRACK_ZONES + bool 'Connection tracking zones' + depends on NETFILTER_ADVANCED + depends on NETFILTER_XT_TARGET_CT + help + This option enables support for connection tracking zones. + Normally, each connection needs to have a unique system wide + identity. Connection tracking zones allow to have multiple + connections using the same identity, as long as they are + contained in different zones. + + If unsure, say `N'. + config NF_CONNTRACK_EVENTS bool "Connection tracking events" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 65351ed5d815..0c9bbe93cc16 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -68,7 +69,7 @@ static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, - unsigned int size, unsigned int rnd) + u16 zone, unsigned int size, unsigned int rnd) { unsigned int n; u_int32_t h; @@ -79,16 +80,16 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, */ n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); h = jhash2((u32 *)tuple, n, - rnd ^ (((__force __u16)tuple->dst.u.all << 16) | - tuple->dst.protonum)); + zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | + tuple->dst.protonum)); return ((u64)h * size) >> 32; } -static inline u_int32_t hash_conntrack(const struct net *net, +static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple) { - return __hash_conntrack(tuple, net->ct.htable_size, + return __hash_conntrack(tuple, zone, net->ct.htable_size, nf_conntrack_hash_rnd); } @@ -292,11 +293,12 @@ static void death_by_timeout(unsigned long ul_conntrack) * - Caller must lock nf_conntrack_lock before calling this function */ struct nf_conntrack_tuple_hash * -__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. @@ -304,7 +306,8 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) local_bh_disable(); begin: hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuple_equal(tuple, &h->tuple)) { + if (nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { NF_CT_STAT_INC(net, found); local_bh_enable(); return h; @@ -326,21 +329,23 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); begin: - h = __nf_conntrack_find(net, tuple); + h = __nf_conntrack_find(net, zone, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(nf_ct_is_dying(ct) || !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { - if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { + if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || + nf_ct_zone(ct) != zone)) { nf_ct_put(ct); goto begin; } @@ -368,9 +373,11 @@ void nf_conntrack_hash_insert(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, repl_hash; + u16 zone; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); __nf_conntrack_hash_insert(ct, hash, repl_hash); } @@ -387,6 +394,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; + u16 zone; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); @@ -398,8 +406,9 @@ __nf_conntrack_confirm(struct sk_buff *skb) if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; - hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + zone = nf_ct_zone(ct); + hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); /* We're not in hash table, and we refuse to set up related connections for unconfirmed conns. But packet copies and @@ -418,11 +427,13 @@ __nf_conntrack_confirm(struct sk_buff *skb) not in the hash. If there is, we lost race. */ hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple)) + &h->tuple) && + zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) goto out; /* Remove from unconfirmed list */ @@ -469,15 +480,19 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - unsigned int hash = hash_conntrack(net, tuple); + struct nf_conn *ct; + u16 zone = nf_ct_zone(ignored_conntrack); + unsigned int hash = hash_conntrack(net, zone, tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. */ rcu_read_lock_bh(); hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (ct != ignored_conntrack && + nf_ct_tuple_equal(tuple, &h->tuple) && + nf_ct_zone(ct) == zone) { NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; @@ -540,7 +555,7 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; } -struct nf_conn *nf_conntrack_alloc(struct net *net, +struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) @@ -558,7 +573,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, if (nf_conntrack_max && unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { - unsigned int hash = hash_conntrack(net, orig); + unsigned int hash = hash_conntrack(net, zone, orig); if (!early_drop(net, hash)) { atomic_dec(&net->ct.count); if (net_ratelimit()) @@ -595,13 +610,28 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, #ifdef CONFIG_NET_NS ct->ct_net = net; #endif - +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif /* * changes to lookup keys must be done before setting refcnt to 1 */ smp_wmb(); atomic_set(&ct->ct_general.use, 1); return ct; + +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + return ERR_PTR(-ENOMEM); +#endif } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); @@ -631,13 +661,14 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.\n"); return NULL; } - ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -657,7 +688,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(net, tuple); + exp = nf_ct_find_expectation(net, zone, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -713,6 +744,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, protonum, &tuple, l3proto, @@ -722,7 +754,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, } /* look for tuple match */ - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) { h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, skb, dataoff); @@ -958,6 +990,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +#ifdef CONFIG_NF_CONNTRACK_ZONES +static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { + .len = sizeof(struct nf_conntrack_zone), + .align = __alignof__(struct nf_conntrack_zone), + .id = NF_CT_EXT_ZONE, +}; +#endif + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include @@ -1139,6 +1179,9 @@ static void nf_conntrack_cleanup_init_net(void) nf_conntrack_helper_fini(); nf_conntrack_proto_fini(); +#ifdef CONFIG_NF_CONNTRACK_ZONES + nf_ct_extend_unregister(&nf_ct_zone_extend); +#endif } static void nf_conntrack_cleanup_net(struct net *net) @@ -1214,6 +1257,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) unsigned int hashsize, old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; if (current->nsproxy->net_ns != &init_net) return -EOPNOTSUPP; @@ -1240,8 +1284,10 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) while (!hlist_nulls_empty(&init_net.ct.hash[i])) { h = hlist_nulls_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnnode); + ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); - bucket = __hash_conntrack(&h->tuple, hashsize, + bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), + hashsize, nf_conntrack_hash_rnd); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } @@ -1299,6 +1345,11 @@ static int nf_conntrack_init_init_net(void) if (ret < 0) goto err_helper; +#ifdef CONFIG_NF_CONNTRACK_ZONES + ret = nf_ct_extend_register(&nf_ct_zone_extend); + if (ret < 0) + goto err_extend; +#endif /* Set up fake conntrack: to never be deleted, not in any hashes */ #ifdef CONFIG_NET_NS nf_conntrack_untracked.ct_net = &init_net; @@ -1309,6 +1360,10 @@ static int nf_conntrack_init_init_net(void) return 0; +#ifdef CONFIG_NF_CONNTRACK_ZONES +err_extend: + nf_conntrack_helper_fini(); +#endif err_helper: nf_conntrack_proto_fini(); err_proto: diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 6182fb1b55de..acb29ccaa41f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -27,6 +27,7 @@ #include #include #include +#include unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); @@ -84,7 +85,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; @@ -95,7 +97,8 @@ __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { - if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) return i; } return NULL; @@ -104,12 +107,13 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(net, tuple); + i = __nf_ct_expect_find(net, zone, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -121,7 +125,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, u16 zone, + const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; @@ -133,7 +138,8 @@ nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) h = nf_ct_expect_dst_hash(tuple); hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && - nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) && + nf_ct_zone(i->master) == zone) { exp = i; break; } @@ -204,7 +210,8 @@ static inline int expect_matches(const struct nf_conntrack_expect *a, { return a->master == b->master && a->class == b->class && nf_ct_tuple_equal(&a->tuple, &b->tuple) && - nf_ct_tuple_mask_equal(&a->mask, &b->mask); + nf_ct_tuple_mask_equal(&a->mask, &b->mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } /* Generally a bad idea to call this: could have matched already. */ diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 66369490230e..a1c8dd917e12 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* Parameters */ @@ -1216,7 +1217,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index db35edac307b..51089cfe1167 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -811,7 +811,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, 0, &tuple); if (!h) return -ENOENT; @@ -872,7 +872,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, &tuple); + h = nf_conntrack_find_get(net, 0, &tuple); if (!h) return -ENOENT; @@ -1221,7 +1221,7 @@ ctnetlink_create_conntrack(struct net *net, int err = -EINVAL; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(net, otuple, rtuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); @@ -1325,7 +1325,7 @@ ctnetlink_create_conntrack(struct net *net, if (err < 0) goto err2; - master_h = nf_conntrack_find_get(net, &master); + master_h = nf_conntrack_find_get(net, 0, &master); if (master_h == NULL) { err = -ENOENT; goto err2; @@ -1374,9 +1374,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, &otuple); + h = __nf_conntrack_find(net, 0, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, &rtuple); + h = __nf_conntrack_find(net, 0, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1714,7 +1714,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(net, &tuple); + exp = nf_ct_expect_find_get(net, 0, &tuple); if (!exp) return -ENOENT; @@ -1770,7 +1770,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, &tuple); + exp = nf_ct_expect_find_get(net, 0, &tuple); if (!exp) return -ENOENT; @@ -1855,7 +1855,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[], return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(net, &master_tuple); + h = nf_conntrack_find_get(net, 0, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1912,7 +1912,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, 0, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 3807ac7faf4c..088944824e13 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -123,7 +124,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(net, &inv_t); + exp_other = nf_ct_expect_find_get(net, nf_ct_zone(ct), &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -136,17 +137,18 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_unlock(); } -static int destroy_sibling_or_exp(struct net *net, +static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; struct nf_conn *sibling; + u16 zone = nf_ct_zone(ct); pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(net, t); + h = nf_conntrack_find_get(net, zone, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); @@ -157,7 +159,7 @@ static int destroy_sibling_or_exp(struct net *net, nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(net, t); + exp = nf_ct_expect_find_get(net, zone, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); @@ -182,7 +184,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ @@ -190,7 +192,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; - if (!destroy_sibling_or_exp(net, &t)) + if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index fbe8ff5a420a..8dd75d90efc0 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -23,6 +23,7 @@ #include #include #include +#include #include MODULE_LICENSE("GPL"); @@ -836,7 +837,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int dataoff, rcu_read_lock(); do { - exp = __nf_ct_expect_find(net, &tuple); + exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e310f1561bb2..24a42efe62ef 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -26,6 +26,7 @@ #include #include #include +#include MODULE_LICENSE("GPL"); @@ -171,6 +172,11 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; #endif +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) + goto release; +#endif + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) goto release; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 8183a054256f..61c50fa84703 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -16,6 +16,7 @@ #include #include #include +#include static unsigned int xt_ct_target(struct sk_buff *skb, const struct xt_target_param *par) @@ -69,11 +70,16 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par) goto out; } +#ifndef CONFIG_NF_CONNTRACK_ZONES + if (info->zone) + goto err1; +#endif + if (nf_ct_l3proto_try_module_get(par->family) < 0) goto err1; memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, &t, &t, GFP_KERNEL); + ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); if (IS_ERR(ct)) goto err2; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 0d9d18ea2b09..26997ce90e48 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -28,6 +28,7 @@ #include #include #include +#include /* we will save the tuples of all connections we care about */ struct xt_connlimit_conn { @@ -114,7 +115,8 @@ static int count_them(struct net *net, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = nf_conntrack_find_get(net, &conn->tuple); + found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, + &conn->tuple); found_ct = NULL; if (found != NULL) -- cgit v1.2.3 From ef00f89f1eb7e056aab9dfe068521e6f2320c94a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Feb 2010 18:14:57 +0100 Subject: netfilter: ctnetlink: add zone support Parse and dump the conntrack zone in ctnetlink. Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 2 + net/netfilter/nf_conntrack_netlink.c | 92 +++++++++++++++++++++------ 2 files changed, 75 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index ed4ef8d0b11b..9ed534c991b9 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -40,6 +40,7 @@ enum ctattr_type { CTA_NAT_SEQ_ADJ_ORIG, CTA_NAT_SEQ_ADJ_REPLY, CTA_SECMARK, + CTA_ZONE, __CTA_MAX }; #define CTA_MAX (__CTA_MAX - 1) @@ -159,6 +160,7 @@ enum ctattr_expect { CTA_EXPECT_TIMEOUT, CTA_EXPECT_ID, CTA_EXPECT_HELP_NAME, + CTA_EXPECT_ZONE, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 51089cfe1167..8b05f364b2f2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -379,6 +380,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, goto nla_put_failure; nla_nest_end(skb, nest_parms); + if (nf_ct_zone(ct)) + NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct))); + if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || @@ -517,6 +521,9 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; nla_nest_end(skb, nest_parms); + if (nf_ct_zone(ct)) + NLA_PUT_BE16(skb, CTA_ZONE, htons(nf_ct_zone(ct))); + if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; @@ -750,6 +757,21 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], return 0; } +static int +ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone) +{ + if (attr) +#ifdef CONFIG_NF_CONNTRACK_ZONES + *zone = ntohs(nla_get_be16(attr)); +#else + return -EOPNOTSUPP; +#endif + else + *zone = 0; + + return 0; +} + static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { [CTA_HELP_NAME] = { .type = NLA_NUL_STRING }, }; @@ -781,6 +803,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = { [CTA_ID] = { .type = NLA_U32 }, [CTA_NAT_DST] = { .type = NLA_NESTED }, [CTA_TUPLE_MASTER] = { .type = NLA_NESTED }, + [CTA_ZONE] = { .type = NLA_U16 }, }; static int @@ -794,7 +817,12 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; + + err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); + if (err < 0) + return err; if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); @@ -811,7 +839,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, 0, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) return -ENOENT; @@ -856,12 +884,17 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2 = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; if (nlh->nlmsg_flags & NLM_F_DUMP) return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, ctnetlink_done); + err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); + if (err < 0) + return err; + if (cda[CTA_TUPLE_ORIG]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3); else if (cda[CTA_TUPLE_REPLY]) @@ -872,7 +905,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(net, 0, &tuple); + h = nf_conntrack_find_get(net, zone, &tuple); if (!h) return -ENOENT; @@ -1211,7 +1244,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, } static struct nf_conn * -ctnetlink_create_conntrack(struct net *net, +ctnetlink_create_conntrack(struct net *net, u16 zone, const struct nlattr * const cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, @@ -1221,7 +1254,7 @@ ctnetlink_create_conntrack(struct net *net, int err = -EINVAL; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(net, 0, otuple, rtuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, zone, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) return ERR_PTR(-ENOMEM); @@ -1325,7 +1358,7 @@ ctnetlink_create_conntrack(struct net *net, if (err < 0) goto err2; - master_h = nf_conntrack_find_get(net, 0, &master); + master_h = nf_conntrack_find_get(net, zone, &master); if (master_h == NULL) { err = -ENOENT; goto err2; @@ -1358,7 +1391,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple_hash *h = NULL; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; + + err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); + if (err < 0) + return err; if (cda[CTA_TUPLE_ORIG]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3); @@ -1374,9 +1412,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(net, 0, &otuple); + h = __nf_conntrack_find(net, zone, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(net, 0, &rtuple); + h = __nf_conntrack_find(net, zone, &rtuple); if (h == NULL) { err = -ENOENT; @@ -1384,7 +1422,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conn *ct; enum ip_conntrack_events events; - ct = ctnetlink_create_conntrack(net, cda, &otuple, + ct = ctnetlink_create_conntrack(net, zone, cda, &otuple, &rtuple, u3); if (IS_ERR(ct)) { err = PTR_ERR(ct); @@ -1698,7 +1736,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct sk_buff *skb2; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { return netlink_dump_start(ctnl, skb, nlh, @@ -1706,6 +1745,10 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, ctnetlink_exp_done); } + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else @@ -1714,7 +1757,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(net, 0, &tuple); + exp = nf_ct_expect_find_get(net, zone, &tuple); if (!exp) return -ENOENT; @@ -1761,16 +1804,21 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; + u16 zone; int err; if (cda[CTA_EXPECT_TUPLE]) { /* delete a single expect by tuple */ + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); if (err < 0) return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(net, 0, &tuple); + exp = nf_ct_expect_find_get(net, zone, &tuple); if (!exp) return -ENOENT; @@ -1832,7 +1880,8 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, } static int -ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[], +ctnetlink_create_expect(struct net *net, u16 zone, + const struct nlattr * const cda[], u_int8_t u3, u32 pid, int report) { @@ -1855,7 +1904,7 @@ ctnetlink_create_expect(struct net *net, const struct nlattr * const cda[], return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(net, 0, &master_tuple); + h = nf_conntrack_find_get(net, zone, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1900,25 +1949,30 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; - int err = 0; + u16 zone; + int err; if (!cda[CTA_EXPECT_TUPLE] || !cda[CTA_EXPECT_MASK] || !cda[CTA_EXPECT_MASTER]) return -EINVAL; + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); if (err < 0) return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(net, 0, &tuple); + exp = __nf_ct_expect_find(net, zone, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) { - err = ctnetlink_create_expect(net, cda, + err = ctnetlink_create_expect(net, zone, cda, u3, NETLINK_CB(skb).pid, nlmsg_report(nlh)); -- cgit v1.2.3 From 3e5e524ffb5fcf2447eb5dd9f8e54ad22dd9baa7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 15 Feb 2010 18:17:10 +0100 Subject: netfilter: CONFIG_COMPAT: allow delta to exceed 32767 with 32 bit userland and 64 bit kernels, it is unlikely but possible that insertion of new rules fails even tough there are only about 2000 iptables rules. This happens because the compat delta is using a short int. Easily reproducible via "iptables -m limit" ; after about 2050 rules inserting new ones fails with -ELOOP. Note that compat_delta included 2 bytes of padding on x86_64, so structure size remains the same. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- include/linux/netfilter/x_tables.h | 2 +- net/netfilter/x_tables.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index c61758f4be31..a18119fb88f0 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -588,7 +588,7 @@ extern void xt_compat_unlock(u_int8_t af); extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); extern void xt_compat_flush_offsets(u_int8_t af); -extern short xt_compat_calc_jump(u_int8_t af, unsigned int offset); +extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); extern int xt_compat_match_offset(const struct xt_match *match); extern int xt_compat_match_from_user(struct xt_entry_match *m, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 69c56287d518..0a12cedfe9e3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); struct compat_delta { struct compat_delta *next; unsigned int offset; - short delta; + int delta; }; struct xt_af { @@ -439,10 +439,10 @@ void xt_compat_flush_offsets(u_int8_t af) } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); -short xt_compat_calc_jump(u_int8_t af, unsigned int offset) +int xt_compat_calc_jump(u_int8_t af, unsigned int offset) { struct compat_delta *tmp; - short delta; + int delta; for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) if (tmp->offset < offset) -- cgit v1.2.3