From 8b13eddfdf04cbfa561725cfc42d6868fe896f56 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Thu, 16 Oct 2014 12:23:29 +0200 Subject: netfilter: refactor NAT redirect IPv4 to use it from nf_tables This patch refactors the IPv4 code so it can be usable both from xt and nf_tables. A similar patch follows-up to handle IPv6. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 1 + net/netfilter/xt_REDIRECT.c | 44 ++------------------------------------------ 2 files changed, 3 insertions(+), 42 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ae5096ab65eb..a0716a3f08b0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -835,6 +835,7 @@ config NETFILTER_XT_TARGET_RATEEST config NETFILTER_XT_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT + select NF_NAT_REDIRECT_IPV4 ---help--- REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index 22a10309297c..b4ffac5fe8e9 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -26,6 +26,7 @@ #include #include #include +#include static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; @@ -98,48 +99,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par) static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = htonl(0x7F000001); - else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); - if (indev && (ifa = indev->ifa_list)) - newdst = ifa->ifa_local; - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum); } static struct xt_target redirect_tg_reg[] __read_mostly = { -- cgit v1.2.3 From 9de920eddb74bf67f1d6af603acc5ed05dcd35e9 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 17 Oct 2014 12:37:52 +0200 Subject: netfilter: refactor NAT redirect IPv6 code to use it from nf_tables This patch refactors the IPv6 code so it can be usable both from xt and nf_tables. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_nat_redirect.h | 8 +++ net/ipv6/netfilter/Kconfig | 6 +++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nf_nat_redirect_ipv6.c | 75 ++++++++++++++++++++++++++++ net/netfilter/Kconfig | 1 + net/netfilter/xt_REDIRECT.c | 40 +-------------- 6 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 include/net/netfilter/ipv6/nf_nat_redirect.h create mode 100644 net/ipv6/netfilter/nf_nat_redirect_ipv6.c (limited to 'net/netfilter') diff --git a/include/net/netfilter/ipv6/nf_nat_redirect.h b/include/net/netfilter/ipv6/nf_nat_redirect.h new file mode 100644 index 000000000000..1ebdffc461cc --- /dev/null +++ b/include/net/netfilter/ipv6/nf_nat_redirect.h @@ -0,0 +1,8 @@ +#ifndef _NF_NAT_REDIRECT_IPV6_H_ +#define _NF_NAT_REDIRECT_IPV6_H_ + +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum); + +#endif /* _NF_NAT_REDIRECT_IPV6_H_ */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6af874fc187f..462eebbf4377 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -82,6 +82,12 @@ config NF_NAT_MASQUERADE_IPV6 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. +config NF_NAT_REDIRECT_IPV6 + tristate "IPv6 redirect support" + help + This is the kernel functionality to provide NAT in the redirect + flavour (redirect packet to local machine) for IPv6. + config NFT_MASQ_IPV6 tristate "IPv6 masquerade support for nf_tables" depends on NF_TABLES_IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbb25f01143c..6c2baab10f21 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o +obj-$(CONFIG_NF_NAT_REDIRECT_IPV6) += nf_nat_redirect_ipv6.o # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c new file mode 100644 index 000000000000..ea1308aeb048 --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c @@ -0,0 +1,75 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum) +{ + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (hooknum == NF_INET_LOCAL_OUT) { + newdst = loopback_addr; + } else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index a0716a3f08b0..49deb4edbac6 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -836,6 +836,7 @@ config NETFILTER_XT_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT select NF_NAT_REDIRECT_IPV4 + select NF_NAT_REDIRECT_IPV6 if IP6_NF_IPTABLES ---help--- REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index b4ffac5fe8e9..b6ec67efd900 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -27,48 +27,12 @@ #include #include #include - -static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; +#include static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct nf_nat_range *range = par->targinfo; - struct nf_nat_range newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = loopback_addr; - else { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - bool addr = false; - - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - if (idev != NULL) { - list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; - addr = true; - break; - } - } - rcu_read_unlock(); - - if (!addr) - return NF_DROP; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); + return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) -- cgit v1.2.3 From e9105f1bead4ec3f64904564c7c6268185d6b363 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 17 Oct 2014 12:39:09 +0200 Subject: netfilter: nf_tables: add new expression nft_redir This new expression provides NAT in the redirect flavour, which is to redirect packets to local machine. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_redir.h | 21 +++++++ include/uapi/linux/netfilter/nf_tables.h | 16 ++++++ net/ipv4/netfilter/Kconfig | 9 +++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/nft_redir_ipv4.c | 77 +++++++++++++++++++++++++ net/ipv6/netfilter/Kconfig | 9 +++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_redir_ipv6.c | 77 +++++++++++++++++++++++++ net/netfilter/Kconfig | 9 +++ net/netfilter/Makefile | 1 + net/netfilter/nft_redir.c | 98 ++++++++++++++++++++++++++++++++ 11 files changed, 319 insertions(+) create mode 100644 include/net/netfilter/nft_redir.h create mode 100644 net/ipv4/netfilter/nft_redir_ipv4.c create mode 100644 net/ipv6/netfilter/nft_redir_ipv6.c create mode 100644 net/netfilter/nft_redir.c (limited to 'net/netfilter') diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h new file mode 100644 index 000000000000..a2d67546afab --- /dev/null +++ b/include/net/netfilter/nft_redir.h @@ -0,0 +1,21 @@ +#ifndef _NFT_REDIR_H_ +#define _NFT_REDIR_H_ + +struct nft_redir { + enum nft_registers sreg_proto_min:8; + enum nft_registers sreg_proto_max:8; + u16 flags; +}; + +extern const struct nla_policy nft_redir_policy[]; + +int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]); + +int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr); + +int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data); + +#endif /* _NFT_REDIR_H_ */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index f31fe7b660a5..16f62a5cf04d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -837,6 +837,22 @@ enum nft_masq_attributes { }; #define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1) +/** + * enum nft_redir_attributes - nf_tables redirect expression netlink attributes + * + * @NFTA_REDIR_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers) + * @NFTA_REDIR_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers) + * @NFTA_REDIR_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32) + */ +enum nft_redir_attributes { + NFTA_REDIR_UNSPEC, + NFTA_REDIR_REG_PROTO_MIN, + NFTA_REDIR_REG_PROTO_MAX, + NFTA_REDIR_FLAGS, + __NFTA_REDIR_MAX +}; +#define NFTA_REDIR_MAX (__NFTA_REDIR_MAX - 1) + /** * enum nft_gen_attributes - nf_tables ruleset generation attributes * diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index a300e2c32b26..8358b2da1549 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -119,6 +119,15 @@ config NFT_MASQ_IPV4 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV4 + tristate "IPv4 redirect support for nf_tables" + depends on NF_TABLES_IPV4 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV4 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + config NF_NAT_SNMP_BASIC tristate "Basic SNMP-ALG support" depends on NF_CONNTRACK_SNMP diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 34e436c92015..902bcd1597bb 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o +obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c new file mode 100644 index 000000000000..643c5967aa27 --- /dev/null +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_redir_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_ipv4_multi_range_compat mr; + unsigned int verdict; + + memset(&mr, 0, sizeof(mr)); + if (priv->sreg_proto_min) { + mr.range[0].min.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + mr.range[0].max.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + mr.range[0].flags |= priv->flags; + + verdict = nf_nat_redirect_ipv4(pkt->skb, &mr, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv4_type; +static const struct nft_expr_ops nft_redir_ipv4_ops = { + .type = &nft_redir_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv4_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "redir", + .ops = &nft_redir_ipv4_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv4_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv4_type); +} + +static void __exit nft_redir_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv4_type); +} + +module_init(nft_redir_ipv4_module_init); +module_exit(nft_redir_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 462eebbf4377..0dbe5c7953e5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -97,6 +97,15 @@ config NFT_MASQ_IPV6 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV6 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 6c2baab10f21..d2ac9f5f212c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o +obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c new file mode 100644 index 000000000000..83420eeaad1c --- /dev/null +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv6_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv6_type); +} + +static void __exit nft_redir_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv6_type); +} + +module_init(nft_redir_ipv6_module_init); +module_exit(nft_redir_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 49deb4edbac6..373486ae4159 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -505,6 +505,15 @@ config NFT_MASQ This option adds the "masquerade" expression that you can use to perform NAT in the masquerade flavour. +config NFT_REDIR + depends on NF_TABLES + depends on NF_CONNTRACK + depends on NF_NAT + tristate "Netfilter nf_tables redirect support" + help + This options adds the "redirect" expression that you can use + to perform NAT in the redirect flavour. + config NFT_NAT depends on NF_TABLES depends on NF_CONNTRACK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index a9571be3f791..f3eb4680f2ec 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o +obj-$(CONFIG_NFT_REDIR) += nft_redir.o # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c new file mode 100644 index 000000000000..e27b4e35718a --- /dev/null +++ b/net/netfilter/nft_redir.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = { + [NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 }, + [NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 }, + [NFTA_REDIR_FLAGS] = { .type = NLA_U32 }, +}; +EXPORT_SYMBOL_GPL(nft_redir_policy); + +int nft_redir_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_redir *priv = nft_expr_priv(expr); + u32 nla_be32; + int err; + + err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); + if (err < 0) + return err; + + if (tb[NFTA_REDIR_REG_PROTO_MIN]) { + nla_be32 = nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN]); + priv->sreg_proto_min = ntohl(nla_be32); + err = nft_validate_input_register(priv->sreg_proto_min); + if (err < 0) + return err; + + if (tb[NFTA_REDIR_REG_PROTO_MAX]) { + nla_be32 = nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX]); + priv->sreg_proto_max = ntohl(nla_be32); + err = nft_validate_input_register(priv->sreg_proto_max); + if (err < 0) + return err; + } else { + priv->sreg_proto_max = priv->sreg_proto_min; + } + } + + if (tb[NFTA_REDIR_FLAGS]) { + priv->flags = ntohl(nla_get_be32(tb[NFTA_REDIR_FLAGS])); + if (priv->flags & ~NF_NAT_RANGE_MASK) + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(nft_redir_init); + +int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_redir *priv = nft_expr_priv(expr); + + if (priv->sreg_proto_min) { + if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN, + htonl(priv->sreg_proto_min))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX, + htonl(priv->sreg_proto_max))) + goto nla_put_failure; + } + + if (priv->flags != 0 && + nla_put_be32(skb, NFTA_REDIR_FLAGS, htonl(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} +EXPORT_SYMBOL_GPL(nft_redir_dump); + +int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); +} +EXPORT_SYMBOL_GPL(nft_redir_validate); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); -- cgit v1.2.3 From d7701089118d23bfed03bad0a6b5cc5115990c9e Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Mon, 6 Oct 2014 13:30:08 -0700 Subject: ipvs: remove unnecessary assignment in __ip_vs_get_out_rt It is a precondition of the function that daddr be equal to dest->addr.ip if dest is non-NULL, so this additional assignment is just confusing for stupid engineers like me. Signed-off-by: Alex Gartrell Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 91f17c1eb8a2..5efa59792505 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -293,7 +293,6 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, &dest->addr.ip, &dest_dst->dst_saddr.ip, atomic_read(&rt->dst.__refcnt)); } - daddr = dest->addr.ip; if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { -- cgit v1.2.3 From 0c26ed1c07f13ca27e2638ffdd1951013ed96c48 Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Wed, 29 Oct 2014 10:04:51 -0200 Subject: netfilter: nf_log: Introduce nft_log_dereference() macro Wrap up a common call pattern in an easier to handle call. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index d7197649dba6..5eaf047ed37f 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -19,6 +19,9 @@ static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); +#define nft_log_dereference(logger) \ + rcu_dereference_protected(logger, lockdep_is_held(&nf_log_mutex)) + static struct nf_logger *__find_logger(int pf, const char *str_logger) { struct nf_logger *log; @@ -28,8 +31,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) if (loggers[pf][i] == NULL) continue; - log = rcu_dereference_protected(loggers[pf][i], - lockdep_is_held(&nf_log_mutex)); + log = nft_log_dereference(loggers[pf][i]); if (!strncasecmp(str_logger, log->name, strlen(log->name))) return log; } @@ -45,8 +47,7 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) return; mutex_lock(&nf_log_mutex); - log = rcu_dereference_protected(net->nf.nf_loggers[pf], - lockdep_is_held(&nf_log_mutex)); + log = nft_log_dereference(net->nf.nf_loggers[pf]); if (log == NULL) rcu_assign_pointer(net->nf.nf_loggers[pf], logger); @@ -61,8 +62,7 @@ void nf_log_unset(struct net *net, const struct nf_logger *logger) mutex_lock(&nf_log_mutex); for (i = 0; i < NFPROTO_NUMPROTO; i++) { - log = rcu_dereference_protected(net->nf.nf_loggers[i], - lockdep_is_held(&nf_log_mutex)); + log = nft_log_dereference(net->nf.nf_loggers[i]); if (log == logger) RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); } @@ -297,8 +297,7 @@ static int seq_show(struct seq_file *s, void *v) int i, ret; struct net *net = seq_file_net(s); - logger = rcu_dereference_protected(net->nf.nf_loggers[*pos], - lockdep_is_held(&nf_log_mutex)); + logger = nft_log_dereference(net->nf.nf_loggers[*pos]); if (!logger) ret = seq_printf(s, "%2lld NONE (", *pos); @@ -312,8 +311,7 @@ static int seq_show(struct seq_file *s, void *v) if (loggers[*pos][i] == NULL) continue; - logger = rcu_dereference_protected(loggers[*pos][i], - lockdep_is_held(&nf_log_mutex)); + logger = nft_log_dereference(loggers[*pos][i]); ret = seq_printf(s, "%s", logger->name); if (ret < 0) return ret; @@ -385,8 +383,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, mutex_unlock(&nf_log_mutex); } else { mutex_lock(&nf_log_mutex); - logger = rcu_dereference_protected(net->nf.nf_loggers[tindex], - lockdep_is_held(&nf_log_mutex)); + logger = nft_log_dereference(net->nf.nf_loggers[tindex]); if (!logger) table->data = "NONE"; else -- cgit v1.2.3 From 8ac2bde2a4a05c38e2bd733bea94507cb1461e06 Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Wed, 29 Oct 2014 10:51:13 -0200 Subject: netfilter: log: protect nf_log_register against double registering Currently, despite the comment right before the function, nf_log_register allows registering two loggers on with the same type and end up overwriting the previous register. Not a real issue today as current tree doesn't have two loggers for the same type but it's better to get this protected. Also make sure that all of its callers do error checking. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_log_arp.c | 12 +++++++++++- net/ipv4/netfilter/nf_log_ipv4.c | 12 +++++++++++- net/ipv6/netfilter/nf_log_ipv6.c | 12 +++++++++++- net/netfilter/nf_log.c | 16 +++++++++++++--- 4 files changed, 46 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index ccfc78db12ee..0c8799a0c9e4 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -10,6 +10,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -130,8 +131,17 @@ static int __init nf_log_arp_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_ARP, &nf_arp_logger); + ret = nf_log_register(NFPROTO_ARP, &nf_arp_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_arp_net_ops); + return ret; } static void __exit nf_log_arp_exit(void) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 078bdca1b607..75101980eeee 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -366,8 +367,17 @@ static int __init nf_log_ipv4_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + ret = nf_log_register(NFPROTO_IPV4, &nf_ip_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv4_net_ops); + return ret; } static void __exit nf_log_ipv4_exit(void) diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 7b17a0be93e7..7fc34d1681a1 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -398,8 +399,17 @@ static int __init nf_log_ipv6_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + return ret; } static void __exit nf_log_ipv6_exit(void) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 5eaf047ed37f..9562e393fdf7 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -75,6 +75,7 @@ EXPORT_SYMBOL(nf_log_unset); int nf_log_register(u_int8_t pf, struct nf_logger *logger) { int i; + int ret = 0; if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers)) return -EINVAL; @@ -82,16 +83,25 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + if (rcu_access_pointer(loggers[i][logger->type])) { + ret = -EEXIST; + goto unlock; + } + } for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) rcu_assign_pointer(loggers[i][logger->type], logger); } else { - /* register at end of list to honor first register win */ + if (rcu_access_pointer(loggers[pf][logger->type])) { + ret = -EEXIST; + goto unlock; + } rcu_assign_pointer(loggers[pf][logger->type], logger); } +unlock: mutex_unlock(&nf_log_mutex); - - return 0; + return ret; } EXPORT_SYMBOL(nf_log_register); -- cgit v1.2.3 From 01cfa0a4ed82132abb6dc7bcb126eaf499cb8af0 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 29 Oct 2014 22:57:19 -0700 Subject: netfilter: fix spelling errors Signed-off-by: Stephen Hemminger Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 2 +- net/netfilter/nf_tables_api.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 5b3eae7d4c9a..bd9d31537905 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -250,7 +250,7 @@ out: } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); -/* appropiate ct lock protecting must be taken by caller */ +/* appropriate ct lock protecting must be taken by caller */ static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65eb2a1160d5..1ffb253c6a77 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2477,7 +2477,7 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, const struct nfgenmsg *nfmsg = nlmsg_data(nlh); int err; - /* Verify existance before starting dump */ + /* Verify existence before starting dump */ err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla); if (err < 0) return err; -- cgit v1.2.3 From c5a589cc3034d035e8490216a45abd3a3b3cd85e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 30 Oct 2014 18:39:12 +0100 Subject: netfilter: nf_log: fix sparse warning in nf_logger_find_get() net/netfilter/nf_log.c:157:16: warning: incorrect type in assignment (different address spaces) net/netfilter/nf_log.c:157:16: expected struct nf_logger *logger net/netfilter/nf_log.c:157:16: got struct nf_logger [noderef] * Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9562e393fdf7..49a64174f3f1 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -154,8 +154,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; - logger = loggers[pf][type]; - if (logger == NULL) + if (rcu_access_pointer(loggers[pf][type]) == NULL) request_module("nf-logger-%u-%u", pf, type); rcu_read_lock(); -- cgit v1.2.3 From ce674173e9f4ef7fd0dc04ea0773cdedfbf8e366 Mon Sep 17 00:00:00 2001 From: Ana Rey Date: Mon, 3 Nov 2014 18:10:50 +0100 Subject: netfilter: nft_meta: add cgroup support This allows you to filter traffic by process control group (cgroup). Signed-off-by: Ana Rey Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_meta.c | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 16f62a5cf04d..832bc46db78b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -579,6 +579,7 @@ enum nft_exthdr_attributes { * @NFT_META_CPU: cpu id through smp_processor_id() * @NFT_META_IIFGROUP: packet input interface group * @NFT_META_OIFGROUP: packet output interface group + * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid) */ enum nft_meta_keys { NFT_META_LEN, @@ -604,6 +605,7 @@ enum nft_meta_keys { NFT_META_CPU, NFT_META_IIFGROUP, NFT_META_OIFGROUP, + NFT_META_CGROUP, }; /** diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 1e7c076ca63a..e99911eda915 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -165,6 +165,12 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; dest->data[0] = out->group; break; + case NFT_META_CGROUP: + if (skb->sk == NULL) + break; + + dest->data[0] = skb->sk->sk_classid; + break; default: WARN_ON(1); goto err; @@ -240,6 +246,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_CPU: case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: + case NFT_META_CGROUP: break; default: return -EOPNOTSUPP; -- cgit v1.2.3 From f6c6339d5e945975066cb452c78a7cbe89413f37 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 12 Nov 2014 11:50:16 +0100 Subject: netfilter: fix unmet dependencies in NETFILTER_XT_TARGET_REDIRECT warning: (NETFILTER_XT_TARGET_REDIRECT) selects NF_NAT_REDIRECT_IPV4 which has unmet direct dependencies (NET && INET && NETFILTER && NF_NAT_IPV4) warning: (NETFILTER_XT_TARGET_REDIRECT) selects NF_NAT_REDIRECT_IPV6 which has unmet direct dependencies (NET && INET && IPV6 && NETFILTER && NF_NAT_IPV6) Fixes: 8b13edd ("netfilter: refactor NAT redirect IPv4 to use it from nf_tables") Fixes: 9de920e ("netfilter: refactor NAT redirect IPv6 code to use it from nf_tables") Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 373486ae4159..57f15a9aa481 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -844,8 +844,8 @@ config NETFILTER_XT_TARGET_RATEEST config NETFILTER_XT_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT - select NF_NAT_REDIRECT_IPV4 - select NF_NAT_REDIRECT_IPV6 if IP6_NF_IPTABLES + select NF_NAT_REDIRECT_IPV4 if NF_NAT_IPV4 + select NF_NAT_REDIRECT_IPV6 if NF_NAT_IPV6 ---help--- REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to -- cgit v1.2.3 From baf4750d92cdfb63d6535f7166c6189d07407b65 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 12 Nov 2014 11:56:47 +0100 Subject: netfilter: nft_redir: fix sparse warnings >> net/netfilter/nft_redir.c:39:26: sparse: incorrect type in assignment (different base types) net/netfilter/nft_redir.c:39:26: expected unsigned int [unsigned] [usertype] nla_be32 net/netfilter/nft_redir.c:39:26: got restricted __be32 >> net/netfilter/nft_redir.c:40:40: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:40:40: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:40:40: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:40:40: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:40:40: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:40:40: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:46:34: sparse: incorrect type in assignment (different base types) net/netfilter/nft_redir.c:46:34: expected unsigned int [unsigned] [usertype] nla_be32 net/netfilter/nft_redir.c:46:34: got restricted __be32 >> net/netfilter/nft_redir.c:47:48: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:47:48: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:47:48: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:47:48: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:47:48: sparse: cast to restricted __be32 >> net/netfilter/nft_redir.c:47:48: sparse: cast to restricted __be32 Fixes: e9105f1 ("netfilter: nf_tables: add new expression nft_redir") Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index e27b4e35718a..9e8093f28311 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -28,7 +28,6 @@ int nft_redir_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_redir *priv = nft_expr_priv(expr); - u32 nla_be32; int err; err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT); @@ -36,15 +35,17 @@ int nft_redir_init(const struct nft_ctx *ctx, return err; if (tb[NFTA_REDIR_REG_PROTO_MIN]) { - nla_be32 = nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN]); - priv->sreg_proto_min = ntohl(nla_be32); + priv->sreg_proto_min = + ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN])); + err = nft_validate_input_register(priv->sreg_proto_min); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { - nla_be32 = nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX]); - priv->sreg_proto_max = ntohl(nla_be32); + priv->sreg_proto_max = + ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX])); + err = nft_validate_input_register(priv->sreg_proto_max); if (err < 0) return err; -- cgit v1.2.3 From 56768644317c7746cb63f61573fcdc2355885707 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Nov 2014 10:04:16 +0100 Subject: netfilter: fix various sparse warnings net/bridge/br_netfilter.c:870:6: symbol 'br_netfilter_enable' was not declared. Should it be static? no; add include net/ipv4/netfilter/nft_reject_ipv4.c:22:6: symbol 'nft_reject_ipv4_eval' was not declared. Should it be static? yes net/ipv6/netfilter/nf_reject_ipv6.c:16:6: symbol 'nf_send_reset6' was not declared. Should it be static? no; add include net/ipv6/netfilter/nft_reject_ipv6.c:22:6: symbol 'nft_reject_ipv6_eval' was not declared. Should it be static? yes net/netfilter/core.c:33:32: symbol 'nf_ipv6_ops' was not declared. Should it be static? no; add include net/netfilter/xt_DSCP.c:40:57: cast truncates bits from constant value (ffffff03 becomes 3) net/netfilter/xt_DSCP.c:57:59: cast truncates bits from constant value (ffffff03 becomes 3) add __force, 3 is what we want. net/ipv4/netfilter/nf_log_arp.c:77:6: symbol 'nf_log_arp_packet' was not declared. Should it be static? yes net/ipv4/netfilter/nf_reject_ipv4.c:17:6: symbol 'nf_send_reset' was not declared. Should it be static? no; add include Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 1 + net/ipv4/netfilter/nf_log_arp.c | 12 ++++++------ net/ipv4/netfilter/nf_reject_ipv4.c | 1 + net/ipv4/netfilter/nft_reject_ipv4.c | 7 +++---- net/ipv6/netfilter/nf_reject_ipv6.c | 1 + net/ipv6/netfilter/nft_reject_ipv6.c | 7 +++---- net/netfilter/core.c | 1 + net/netfilter/xt_DSCP.c | 6 ++++-- 8 files changed, 20 insertions(+), 16 deletions(-) (limited to 'net/netfilter') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1bada53bb195..f81dc33bf8a1 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "br_private.h" diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 0c8799a0c9e4..d059182c1466 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -75,12 +75,12 @@ static void dump_arp_packet(struct nf_log_buf *m, ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst); } -void nf_log_arp_packet(struct net *net, u_int8_t pf, - unsigned int hooknum, const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) +static void nf_log_arp_packet(struct net *net, u_int8_t pf, + unsigned int hooknum, const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *loginfo, + const char *prefix) { struct nf_log_buf *m; diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 92b303dbd5fc..cdcb9a5d6786 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -11,6 +11,7 @@ #include #include #include +#include #include /* Send RST reply */ diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index ed33299c56d1..d729542bd1b7 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -19,9 +19,9 @@ #include #include -void nft_reject_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -36,7 +36,6 @@ void nft_reject_ipv4_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); static struct nft_expr_type nft_reject_ipv4_type; static const struct nft_expr_ops nft_reject_ipv4_ops = { diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 20d9defc6c59..87576ff15e1b 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -11,6 +11,7 @@ #include #include #include +#include #include void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 0bc19fa87821..f73285924144 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -19,9 +19,9 @@ #include #include -void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -38,7 +38,6 @@ void nft_reject_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); static struct nft_expr_type nft_reject_ipv6_type; static const struct nft_expr_ops nft_reject_ipv6_ops = { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 024a2e25c8a4..fea9ef566427 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index ae8271652efa..3f83d38c4e5b 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -37,7 +37,8 @@ dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, sizeof(struct iphdr))) return NF_DROP; - ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK), + ipv4_change_dsfield(ip_hdr(skb), + (__force __u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } @@ -54,7 +55,8 @@ dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) return NF_DROP; - ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK), + ipv6_change_dsfield(ipv6_hdr(skb), + (__force __u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; -- cgit v1.2.3 From 8225161545a67bdb68cf86beafcdce1604720605 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 6 Nov 2014 12:32:28 +0100 Subject: netfilter: nfnetlink_log: remove unnecessary error messages In case of OOM, there's nothing userspace can do. If there's no room to put the payload in __build_packet_message(), jump to nla_put_failure which already performs the corresponding error reporting. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1e3a0579416..51996ece3d2a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -334,9 +334,6 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, skb = nfnetlink_alloc_skb(net, pkt_size, peer_portid, GFP_ATOMIC); - if (!skb) - pr_err("nfnetlink_log: can't even alloc %u bytes\n", - pkt_size); } } @@ -568,10 +565,8 @@ __build_packet_message(struct nfnl_log_net *log, struct nlattr *nla; int size = nla_attr_size(data_len); - if (skb_tailroom(inst->skb) < nla_total_size(data_len)) { - printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); - return -1; - } + if (skb_tailroom(inst->skb) < nla_total_size(data_len)) + goto nla_put_failure; nla = (struct nlattr *)skb_put(inst->skb, nla_total_size(data_len)); nla->nla_type = NFULA_PAYLOAD; -- cgit v1.2.3 From 1f501d62523f0d5fd152006e16938ee674932c1b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Nov 2014 18:11:19 +0800 Subject: netfilter: Move mutex_is_held under PROVE_LOCKING The rhashtable function mutex_is_held is only used when PROVE_LOCKING is enabled. This patch modifies netfilter so that we can rhashtable.h itself can later make mutex_is_held optional depending on PROVE_LOCKING. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/netfilter/nft_hash.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 8892b7b6184a..b86305c86048 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,10 +153,12 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) return sizeof(struct rhashtable); } +#ifdef CONFIG_PROVE_LOCKING static int lockdep_nfnl_lock_is_held(void) { return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); } +#endif static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, @@ -171,7 +173,9 @@ static int nft_hash_init(const struct nft_set *set, .hashfn = jhash, .grow_decision = rht_grow_above_75, .shrink_decision = rht_shrink_below_30, +#ifdef CONFIG_PROVE_LOCKING .mutex_is_held = lockdep_nfnl_lock_is_held, +#endif }; return rhashtable_init(priv, ¶ms); -- cgit v1.2.3 From 7b4ce2353467fdab6e003be7a3129fb09b09deac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 13 Nov 2014 18:11:22 +0800 Subject: rhashtable: Add parent argument to mutex_is_held Currently mutex_is_held can only test locks in the that are global since it takes no arguments. This prevents rhashtable from being used in places where locks are lock, e.g., per-namespace locks. This patch adds a parent field to mutex_is_held and rhashtable_params so that local locks can be used (and tested). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 3 ++- lib/rhashtable.c | 4 ++-- net/netfilter/nft_hash.c | 2 +- net/netlink/af_netlink.c | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 96ce8ceff554..473e26bdb91d 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -66,7 +66,8 @@ struct rhashtable_params { bool (*shrink_decision)(const struct rhashtable *ht, size_t new_size); #ifdef CONFIG_PROVE_LOCKING - int (*mutex_is_held)(void); + int (*mutex_is_held)(void *parent); + void *parent; #endif }; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c7654b6f5f64..4b4b53bfa08b 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -32,7 +32,7 @@ #ifdef CONFIG_PROVE_LOCKING int lockdep_rht_mutex_is_held(const struct rhashtable *ht) { - return ht->p.mutex_is_held(); + return ht->p.mutex_is_held(ht->p.parent); } EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); #endif @@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(rhashtable_destroy); #define TEST_NEXPANDS 4 #ifdef CONFIG_PROVE_LOCKING -static int test_mutex_is_held(void) +static int test_mutex_is_held(void *parent) { return 1; } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index b86305c86048..3f75aaaf9d06 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -154,7 +154,7 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) } #ifdef CONFIG_PROVE_LOCKING -static int lockdep_nfnl_lock_is_held(void) +static int lockdep_nfnl_lock_is_held(void *parent) { return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 53b8ea793191..9e0628cfdf67 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -115,7 +115,7 @@ DEFINE_MUTEX(nl_sk_hash_lock); EXPORT_SYMBOL_GPL(nl_sk_hash_lock); #ifdef CONFIG_PROVE_LOCKING -static int lockdep_nl_sk_hash_is_held(void) +static int lockdep_nl_sk_hash_is_held(void *parent) { if (debug_locks) return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock); -- cgit v1.2.3 From 6eba82248ef47fd478f940a418429e3ec95cb3db Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 13 Nov 2014 13:45:46 +0100 Subject: rhashtable: Drop gfp_flags arg in insert/remove functions Reallocation is only required for shrinking and expanding and both rely on a mutex for synchronization and callers of rhashtable_init() are in non atomic context. Therefore, no reason to continue passing allocation hints through the API. Instead, use GFP_KERNEL and add __GFP_NOWARN | __GFP_NORETRY to allow for silent fall back to vzalloc() without the OOM killer jumping in as pointed out by Eric Dumazet and Eric W. Biederman. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 10 +++++----- lib/rhashtable.c | 41 +++++++++++++++++------------------------ net/netfilter/nft_hash.c | 4 ++-- net/netlink/af_netlink.c | 4 ++-- 4 files changed, 26 insertions(+), 33 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 473e26bdb91d..b93fd89b2e5e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -99,16 +99,16 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len); u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr); -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t); -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t); +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); +bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head __rcu **pprev, gfp_t flags); + struct rhash_head __rcu **pprev); bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); -int rhashtable_expand(struct rhashtable *ht, gfp_t flags); -int rhashtable_shrink(struct rhashtable *ht, gfp_t flags); +int rhashtable_expand(struct rhashtable *ht); +int rhashtable_shrink(struct rhashtable *ht); void *rhashtable_lookup(const struct rhashtable *ht, const void *key); void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4b4b53bfa08b..25e4c213b08a 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -107,13 +107,13 @@ static u32 head_hashfn(const struct rhashtable *ht, return obj_hashfn(ht, rht_obj(ht, he), hsize); } -static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags) +static struct bucket_table *bucket_table_alloc(size_t nbuckets) { struct bucket_table *tbl; size_t size; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, flags); + tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); if (tbl == NULL) tbl = vzalloc(size); @@ -200,7 +200,6 @@ static void hashtable_chain_unzip(const struct rhashtable *ht, /** * rhashtable_expand - Expand hash table while allowing concurrent lookups * @ht: the hash table to expand - * @flags: allocation flags * * A secondary bucket array is allocated and the hash entries are migrated * while keeping them on both lists until the end of the RCU grace period. @@ -211,7 +210,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht, * The caller must ensure that no concurrent table mutations take place. * It is however valid to have concurrent lookups if they are RCU protected. */ -int rhashtable_expand(struct rhashtable *ht, gfp_t flags) +int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); struct rhash_head *he; @@ -223,7 +222,7 @@ int rhashtable_expand(struct rhashtable *ht, gfp_t flags) if (ht->p.max_shift && ht->shift >= ht->p.max_shift) return 0; - new_tbl = bucket_table_alloc(old_tbl->size * 2, flags); + new_tbl = bucket_table_alloc(old_tbl->size * 2); if (new_tbl == NULL) return -ENOMEM; @@ -281,7 +280,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink - * @flags: allocation flags * * This function may only be called in a context where it is safe to call * synchronize_rcu(), e.g. not within a rcu_read_lock() section. @@ -289,7 +287,7 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * The caller must ensure that no concurrent table mutations take place. * It is however valid to have concurrent lookups if they are RCU protected. */ -int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) +int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); struct rhash_head __rcu **pprev; @@ -300,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags) if (ht->shift <= ht->p.min_shift) return 0; - ntbl = bucket_table_alloc(tbl->size / 2, flags); + ntbl = bucket_table_alloc(tbl->size / 2); if (ntbl == NULL) return -ENOMEM; @@ -341,7 +339,6 @@ EXPORT_SYMBOL_GPL(rhashtable_shrink); * rhashtable_insert - insert object into hash hash table * @ht: hash table * @obj: pointer to hash head inside object - * @flags: allocation flags (table expansion) * * Will automatically grow the table via rhashtable_expand() if the the * grow_decision function specified at rhashtable_init() returns true. @@ -349,8 +346,7 @@ EXPORT_SYMBOL_GPL(rhashtable_shrink); * The caller must ensure that no concurrent table mutations occur. It is * however valid to have concurrent lookups if they are RCU protected. */ -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - gfp_t flags) +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) { struct bucket_table *tbl = rht_dereference(ht->tbl, ht); u32 hash; @@ -363,7 +359,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, ht->nelems++; if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) - rhashtable_expand(ht, flags); + rhashtable_expand(ht); } EXPORT_SYMBOL_GPL(rhashtable_insert); @@ -372,14 +368,13 @@ EXPORT_SYMBOL_GPL(rhashtable_insert); * @ht: hash table * @obj: pointer to hash head inside object * @pprev: pointer to previous element - * @flags: allocation flags (table expansion) * * Identical to rhashtable_remove() but caller is alreayd aware of the element * in front of the element to be deleted. This is in particular useful for * deletion when combined with walking or lookup. */ void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head __rcu **pprev, gfp_t flags) + struct rhash_head __rcu **pprev) { struct bucket_table *tbl = rht_dereference(ht->tbl, ht); @@ -390,7 +385,7 @@ void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) - rhashtable_shrink(ht, flags); + rhashtable_shrink(ht); } EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); @@ -398,7 +393,6 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); * rhashtable_remove - remove object from hash table * @ht: hash table * @obj: pointer to hash head inside object - * @flags: allocation flags (table expansion) * * Since the hash chain is single linked, the removal operation needs to * walk the bucket chain upon removal. The removal operation is thus @@ -410,8 +404,7 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); * The caller must ensure that no concurrent table mutations occur. It is * however valid to have concurrent lookups if they are RCU protected. */ -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj, - gfp_t flags) +bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) { struct bucket_table *tbl = rht_dereference(ht->tbl, ht); struct rhash_head __rcu **pprev; @@ -429,7 +422,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj, continue; } - rhashtable_remove_pprev(ht, he, pprev, flags); + rhashtable_remove_pprev(ht, he, pprev); return true; } @@ -576,7 +569,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (params->nelem_hint) size = rounded_hashtable_size(params); - tbl = bucket_table_alloc(size, GFP_KERNEL); + tbl = bucket_table_alloc(size); if (tbl == NULL) return -ENOMEM; @@ -713,7 +706,7 @@ static int __init test_rhashtable(struct rhashtable *ht) obj->ptr = TEST_PTR; obj->value = i * 2; - rhashtable_insert(ht, &obj->node, GFP_KERNEL); + rhashtable_insert(ht, &obj->node); } rcu_read_lock(); @@ -724,7 +717,7 @@ static int __init test_rhashtable(struct rhashtable *ht) for (i = 0; i < TEST_NEXPANDS; i++) { pr_info(" Table expansion iteration %u...\n", i); - rhashtable_expand(ht, GFP_KERNEL); + rhashtable_expand(ht); rcu_read_lock(); pr_info(" Verifying lookups...\n"); @@ -734,7 +727,7 @@ static int __init test_rhashtable(struct rhashtable *ht) for (i = 0; i < TEST_NEXPANDS; i++) { pr_info(" Table shrinkage iteration %u...\n", i); - rhashtable_shrink(ht, GFP_KERNEL); + rhashtable_shrink(ht); rcu_read_lock(); pr_info(" Verifying lookups...\n"); @@ -749,7 +742,7 @@ static int __init test_rhashtable(struct rhashtable *ht) obj = rhashtable_lookup(ht, &key); BUG_ON(!obj); - rhashtable_remove(ht, &obj->node, GFP_KERNEL); + rhashtable_remove(ht, &obj->node); kfree(obj); } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 3f75aaaf9d06..1e316ce4cb5d 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -65,7 +65,7 @@ static int nft_hash_insert(const struct nft_set *set, if (set->flags & NFT_SET_MAP) nft_data_copy(he->data, &elem->data); - rhashtable_insert(priv, &he->node, GFP_KERNEL); + rhashtable_insert(priv, &he->node); return 0; } @@ -88,7 +88,7 @@ static void nft_hash_remove(const struct nft_set *set, pprev = elem->cookie; he = rht_dereference((*pprev), priv); - rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL); + rhashtable_remove_pprev(priv, he, pprev); synchronize_rcu(); kfree(he); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9e0628cfdf67..a491c1a4861f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1092,7 +1092,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) nlk_sk(sk)->portid = portid; sock_hold(sk); - rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL); + rhashtable_insert(&table->hash, &nlk_sk(sk)->node); err = 0; err: mutex_unlock(&nl_sk_hash_lock); @@ -1105,7 +1105,7 @@ static void netlink_remove(struct sock *sk) mutex_lock(&nl_sk_hash_lock); table = &nl_table[sk->sk_protocol]; - if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) { + if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) { WARN_ON(atomic_read(&sk->sk_refcnt) == 1); __sock_put(sk); } -- cgit v1.2.3 From e59ea3df3fc290b91c52a250c19091c18b4e6d7e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 14 Nov 2014 13:21:48 +0100 Subject: netfilter: xt_connlimit: honor conntrack zone if available Currently all the conntrack lookups are done using default zone. In case the skb has a ct attached (e.g. template) we should use this zone for lookups instead. This makes connlimit work with connections assigned to other zones. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connlimit.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index fbc66bb250d5..29ba6218a820 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -134,6 +134,7 @@ static bool add_hlist(struct hlist_head *head, static unsigned int check_hlist(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, + u16 zone, bool *addit) { const struct nf_conntrack_tuple_hash *found; @@ -147,8 +148,7 @@ static unsigned int check_hlist(struct net *net, /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE, - &conn->tuple); + found = nf_conntrack_find_get(net, zone, &conn->tuple); if (found == NULL) { hlist_del(&conn->node); kmem_cache_free(connlimit_conn_cachep, conn); @@ -201,7 +201,7 @@ static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u8 family) + u8 family, u16 zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; struct rb_node **rbnode, *parent; @@ -229,7 +229,7 @@ count_tree(struct net *net, struct rb_root *root, } else { /* same source network -> be counted! */ unsigned int count; - count = check_hlist(net, &rbconn->hhead, tuple, &addit); + count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit); tree_nodes_free(root, gc_nodes, gc_count); if (!addit) @@ -245,7 +245,7 @@ count_tree(struct net *net, struct rb_root *root, continue; /* only used for GC on hhead, retval and 'addit' ignored */ - check_hlist(net, &rbconn->hhead, tuple, &addit); + check_hlist(net, &rbconn->hhead, tuple, zone, &addit); if (hlist_empty(&rbconn->hhead)) gc_nodes[gc_count++] = rbconn; } @@ -290,7 +290,7 @@ static int count_them(struct net *net, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, const union nf_inet_addr *mask, - u_int8_t family) + u_int8_t family, u16 zone) { struct rb_root *root; int count; @@ -306,7 +306,7 @@ static int count_them(struct net *net, spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - count = count_tree(net, root, tuple, addr, mask, family); + count = count_tree(net, root, tuple, addr, mask, family, zone); spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); @@ -324,13 +324,16 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int connections; + u16 zone = NF_CT_DEFAULT_ZONE; ct = nf_ct_get(skb, &ctinfo); - if (ct != NULL) + if (ct != NULL) { tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - par->family, &tuple)) + zone = nf_ct_zone(ct); + } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), + par->family, &tuple)) { goto hotdrop; + } if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -343,7 +346,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, par->family); + &info->mask, par->family, zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; -- cgit v1.2.3 From 2c7b5d5dac0ddaa4e1109fb84dbbe91db3c6c6e5 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Thu, 13 Nov 2014 14:31:25 +0300 Subject: netfilter: nf_conntrack_h323: lookup route from proper net namespace Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 3a3a60b126e0..1d69f5b9748f 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -728,7 +728,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ -static int callforward_do_filter(const union nf_inet_addr *src, +static int callforward_do_filter(struct net *net, + const union nf_inet_addr *src, const union nf_inet_addr *dst, u_int8_t family) { @@ -750,9 +751,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; - if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + if (!afinfo->route(net, (struct dst_entry **)&rt1, flowi4_to_flowi(&fl1), false)) { - if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + if (!afinfo->route(net, (struct dst_entry **)&rt2, flowi4_to_flowi(&fl2), false)) { if (rt_nexthop(rt1, fl1.daddr) == rt_nexthop(rt2, fl2.daddr) && @@ -774,9 +775,9 @@ static int callforward_do_filter(const union nf_inet_addr *src, memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->in6; - if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, + if (!afinfo->route(net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1), false)) { - if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, + if (!afinfo->route(net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { if (ipv6_addr_equal(rt6_nexthop(rt1), rt6_nexthop(rt2)) && @@ -807,6 +808,7 @@ static int expect_callforwarding(struct sk_buff *skb, __be16 port; union nf_inet_addr addr; struct nf_conntrack_expect *exp; + struct net *net = nf_ct_net(ct); typeof(nat_callforwarding_hook) nat_callforwarding; /* Read alternativeAddress */ @@ -816,7 +818,7 @@ static int expect_callforwarding(struct sk_buff *skb, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ if (callforward_filter && - callforward_do_filter(&addr, &ct->tuplehash[!dir].tuple.src.u3, + callforward_do_filter(net, &addr, &ct->tuplehash[!dir].tuple.src.u3, nf_ct_l3num(ct))) { pr_debug("nf_ct_q931: Call Forwarding not tracked\n"); return 0; -- cgit v1.2.3 From 982f405136a44754e884184d24b70d2d4cefcb7a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 18 Nov 2014 20:37:05 +0100 Subject: netfilter: Deletion of unnecessary checks before two function calls The functions free_percpu() and module_put() test whether their argument is NULL and then return immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 3 +-- net/netfilter/ipvs/ip_vs_pe.c | 3 +-- net/netfilter/ipvs/ip_vs_sched.c | 3 +-- net/netfilter/ipvs/ip_vs_sync.c | 3 +-- net/netfilter/nf_tables_api.c | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ac7ba689efe7..b8295a430a56 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -465,8 +465,7 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) static void ip_vs_service_free(struct ip_vs_service *svc) { - if (svc->stats.cpustats) - free_percpu(svc->stats.cpustats); + free_percpu(svc->stats.cpustats); kfree(svc); } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 1a82b29ce8ea..0df17caa8af6 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -37,8 +37,7 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) rcu_read_unlock(); return pe; } - if (pe->module) - module_put(pe->module); + module_put(pe->module); } rcu_read_unlock(); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 4dbcda6258bc..199760c71f39 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -104,8 +104,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name) mutex_unlock(&ip_vs_sched_mutex); return sched; } - if (sched->module) - module_put(sched->module); + module_put(sched->module); } mutex_unlock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7162c86fd50d..c47ffd7a0a70 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -820,8 +820,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC); if (!p->pe_data) { - if (p->pe->module) - module_put(p->pe->module); + module_put(p->pe->module); return -ENOMEM; } p->pe_data_len = pe_data_len; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1ffb253c6a77..18a9daef22dd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3674,8 +3674,7 @@ static int nf_tables_abort(struct sk_buff *skb) break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (nft_trans_chain_stats(trans)) - free_percpu(nft_trans_chain_stats(trans)); + free_percpu(nft_trans_chain_stats(trans)); nft_trans_destroy(trans); } else { -- cgit v1.2.3 From beacd3e8ef237e077c8707395440813feef16d3f Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Thu, 6 Nov 2014 12:32:30 +0100 Subject: netfilter: nfnetlink_log: Make use of pr_fmt where applicable Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 51996ece3d2a..405cf5879320 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -12,6 +12,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1063,19 +1066,19 @@ static int __init nfnetlink_log_init(void) netlink_register_notifier(&nfulnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfulnl_subsys); if (status < 0) { - pr_err("log: failed to create netlink socket\n"); + pr_err("failed to create netlink socket\n"); goto cleanup_netlink_notifier; } status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); if (status < 0) { - pr_err("log: failed to register logger\n"); + pr_err("failed to register logger\n"); goto cleanup_subsys; } status = register_pernet_subsys(&nfnl_log_net_ops); if (status < 0) { - pr_err("log: failed to register pernet ops\n"); + pr_err("failed to register pernet ops\n"); goto cleanup_logger; } return status; -- cgit v1.2.3 From abc86d0f99242b7f142b7cb8f90e30081dd3c256 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Nov 2014 14:06:22 +0100 Subject: netfilter: xt_recent: relax ip_pkt_list_tot restrictions The maximum value for the hitcount parameter is given by "ip_pkt_list_tot" parameter (default: 20). Exceeding this value on the command line will cause the rule to be rejected. The parameter is also readonly, i.e. it cannot be changed without module unload or reboot. Store size per table, then base nstamps[] size on the hitcount instead. The module parameter is retained for backwards compatibility. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_recent.c | 64 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 17 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index a9faae89f955..30dbe34915ae 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -43,25 +43,29 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_recent"); MODULE_ALIAS("ip6t_recent"); -static unsigned int ip_list_tot = 100; -static unsigned int ip_pkt_list_tot = 20; -static unsigned int ip_list_hash_size = 0; -static unsigned int ip_list_perms = 0644; -static unsigned int ip_list_uid = 0; -static unsigned int ip_list_gid = 0; +static unsigned int ip_list_tot __read_mostly = 100; +static unsigned int ip_list_hash_size __read_mostly; +static unsigned int ip_list_perms __read_mostly = 0644; +static unsigned int ip_list_uid __read_mostly; +static unsigned int ip_list_gid __read_mostly; module_param(ip_list_tot, uint, 0400); -module_param(ip_pkt_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR); module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files"); +/* retained for backwards compatibility */ +static unsigned int ip_pkt_list_tot __read_mostly; +module_param(ip_pkt_list_tot, uint, 0400); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); + +#define XT_RECENT_MAX_NSTAMPS 256 + struct recent_entry { struct list_head list; struct list_head lru_list; @@ -79,6 +83,7 @@ struct recent_table { union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; + u8 nstamps_max_mask; struct list_head lru_list; struct list_head iphash[0]; }; @@ -90,7 +95,8 @@ struct recent_net { #endif }; -static int recent_net_id; +static int recent_net_id __read_mostly; + static inline struct recent_net *recent_pernet(struct net *net) { return net_generic(net, recent_net_id); @@ -171,12 +177,15 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; + unsigned int nstamps_max = t->nstamps_max_mask; if (t->entries >= ip_list_tot) { e = list_entry(t->lru_list.next, struct recent_entry, lru_list); recent_entry_remove(t, e); } - e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * ip_pkt_list_tot, + + nstamps_max += 1; + e = kmalloc(sizeof(*e) + sizeof(e->stamps[0]) * nstamps_max, GFP_ATOMIC); if (e == NULL) return NULL; @@ -197,7 +206,7 @@ recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, static void recent_entry_update(struct recent_table *t, struct recent_entry *e) { - e->index %= ip_pkt_list_tot; + e->index &= t->nstamps_max_mask; e->stamps[e->index++] = jiffies; if (e->index > e->nstamps) e->nstamps = e->index; @@ -326,6 +335,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, kuid_t uid; kgid_t gid; #endif + unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; size_t sz; @@ -349,19 +359,33 @@ static int recent_mt_check(const struct xt_mtchk_param *par, return -EINVAL; if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; - if (info->hit_count > ip_pkt_list_tot) { - pr_info("hitcount (%u) is larger than " - "packets to be remembered (%u)\n", - info->hit_count, ip_pkt_list_tot); + if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { + pr_info("hitcount (%u) is larger than allowed maximum (%u)\n", + info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } if (info->name[0] == '\0' || strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) return -EINVAL; + if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) + nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; + else if (info->hit_count) + nstamp_mask = roundup_pow_of_two(info->hit_count) - 1; + else + nstamp_mask = 32 - 1; + mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { + if (info->hit_count > t->nstamps_max_mask) { + pr_info("hitcount (%u) is larger than packets to be remembered (%u) for table %s\n", + info->hit_count, t->nstamps_max_mask + 1, + info->name); + ret = -EINVAL; + goto out; + } + t->refcnt++; ret = 0; goto out; @@ -377,6 +401,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, goto out; } t->refcnt = 1; + t->nstamps_max_mask = nstamp_mask; memcpy(&t->mask, &info->mask, sizeof(t->mask)); strcpy(t->name, info->name); @@ -497,9 +522,12 @@ static void recent_seq_stop(struct seq_file *s, void *v) static int recent_seq_show(struct seq_file *seq, void *v) { const struct recent_entry *e = v; + struct recent_iter_state *st = seq->private; + const struct recent_table *t = st->table; unsigned int i; - i = (e->index - 1) % ip_pkt_list_tot; + i = (e->index - 1) & t->nstamps_max_mask; + if (e->family == NFPROTO_IPV4) seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.ip, e->ttl, e->stamps[i], e->index); @@ -717,7 +745,9 @@ static int __init recent_mt_init(void) { int err; - if (!ip_list_tot || !ip_pkt_list_tot || ip_pkt_list_tot > 255) + BUILD_BUG_ON_NOT_POWER_OF_2(XT_RECENT_MAX_NSTAMPS); + + if (!ip_list_tot || ip_pkt_list_tot >= XT_RECENT_MAX_NSTAMPS) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); -- cgit v1.2.3 From c41884ce0562841b98fa9790c9209c9073121a15 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Nov 2014 15:25:57 +0100 Subject: netfilter: conntrack: avoid zeroing timer add a __nfct_init_offset annotation member to struct nf_conn to make it clear which members are covered by the memset when the conntrack is allocated. This avoids zeroing timer_list and ct_net; both are already inited explicitly. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 15 +++++++++------ net/netfilter/nf_conntrack_core.c | 11 ++++------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'net/netfilter') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index c8a7db605e03..f0daed2b54d1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -92,12 +92,18 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* If we were expected by an expectation, this will be it */ - struct nf_conn *master; - /* Timer function; drops refcnt when it goes off. */ struct timer_list timeout; +#ifdef CONFIG_NET_NS + struct net *ct_net; +#endif + /* all members below initialized via memset */ + u8 __nfct_init_offset[0]; + + /* If we were expected by an expectation, this will be it */ + struct nf_conn *master; + #if defined(CONFIG_NF_CONNTRACK_MARK) u_int32_t mark; #endif @@ -108,9 +114,6 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; -#ifdef CONFIG_NET_NS - struct net *ct_net; -#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2c699757bccf..9ef88c8dd68a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -826,22 +826,19 @@ __nf_conntrack_alloc(struct net *net, u16 zone, atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } - /* - * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next - * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. - */ - memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, - offsetof(struct nf_conn, proto) - - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; + ct->status = 0; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); write_pnet(&ct->ct_net, net); + memset(&ct->__nfct_init_offset[0], 0, + offsetof(struct nf_conn, proto) - + offsetof(struct nf_conn, __nfct_init_offset[0])); #ifdef CONFIG_NF_CONNTRACK_ZONES if (zone) { struct nf_conntrack_zone *nf_ct_zone; -- cgit v1.2.3 From b59eaf9e2871735ea7cc7e3dbf8bf83bddd786b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Nov 2014 12:46:50 +0100 Subject: netfilter: combine IPv4 and IPv6 nf_nat_redirect code in one module This resolves linking problems with CONFIG_IPV6=n: net/built-in.o: In function `redirect_tg6': xt_REDIRECT.c:(.text+0x6d021): undefined reference to `nf_nat_redirect_ipv6' Reported-by: Andreas Ruprecht Reported-by: Or Gerlitz Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_nat_redirect.h | 9 -- include/net/netfilter/ipv6/nf_nat_redirect.h | 8 -- include/net/netfilter/nf_nat_redirect.h | 12 +++ net/ipv4/netfilter/Kconfig | 8 +- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/nf_nat_redirect_ipv4.c | 82 ----------------- net/ipv4/netfilter/nft_redir_ipv4.c | 2 +- net/ipv6/netfilter/Kconfig | 8 +- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/nf_nat_redirect_ipv6.c | 75 ---------------- net/ipv6/netfilter/nft_redir_ipv6.c | 2 +- net/netfilter/Kconfig | 10 ++- net/netfilter/Makefile | 1 + net/netfilter/nf_nat_redirect.c | 127 +++++++++++++++++++++++++++ net/netfilter/xt_REDIRECT.c | 3 +- 15 files changed, 153 insertions(+), 196 deletions(-) delete mode 100644 include/net/netfilter/ipv4/nf_nat_redirect.h delete mode 100644 include/net/netfilter/ipv6/nf_nat_redirect.h create mode 100644 include/net/netfilter/nf_nat_redirect.h delete mode 100644 net/ipv4/netfilter/nf_nat_redirect_ipv4.c delete mode 100644 net/ipv6/netfilter/nf_nat_redirect_ipv6.c create mode 100644 net/netfilter/nf_nat_redirect.c (limited to 'net/netfilter') diff --git a/include/net/netfilter/ipv4/nf_nat_redirect.h b/include/net/netfilter/ipv4/nf_nat_redirect.h deleted file mode 100644 index 19e1df3a0a4d..000000000000 --- a/include/net/netfilter/ipv4/nf_nat_redirect.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef _NF_NAT_REDIRECT_IPV4_H_ -#define _NF_NAT_REDIRECT_IPV4_H_ - -unsigned int -nf_nat_redirect_ipv4(struct sk_buff *skb, - const struct nf_nat_ipv4_multi_range_compat *mr, - unsigned int hooknum); - -#endif /* _NF_NAT_REDIRECT_IPV4_H_ */ diff --git a/include/net/netfilter/ipv6/nf_nat_redirect.h b/include/net/netfilter/ipv6/nf_nat_redirect.h deleted file mode 100644 index 1ebdffc461cc..000000000000 --- a/include/net/netfilter/ipv6/nf_nat_redirect.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _NF_NAT_REDIRECT_IPV6_H_ -#define _NF_NAT_REDIRECT_IPV6_H_ - -unsigned int -nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, - unsigned int hooknum); - -#endif /* _NF_NAT_REDIRECT_IPV6_H_ */ diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h new file mode 100644 index 000000000000..73b729543309 --- /dev/null +++ b/include/net/netfilter/nf_nat_redirect.h @@ -0,0 +1,12 @@ +#ifndef _NF_NAT_REDIRECT_H_ +#define _NF_NAT_REDIRECT_H_ + +unsigned int +nf_nat_redirect_ipv4(struct sk_buff *skb, + const struct nf_nat_ipv4_multi_range_compat *mr, + unsigned int hooknum); +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum); + +#endif /* _NF_NAT_REDIRECT_H_ */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 8358b2da1549..59f883d9cadf 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -104,12 +104,6 @@ config NF_NAT_MASQUERADE_IPV4 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection). -config NF_NAT_REDIRECT_IPV4 - tristate "IPv4 redirect support" - help - This is the kernel functionality to provide NAT in the redirect - flavour (redirect packets to local machine). - config NFT_MASQ_IPV4 tristate "IPv4 masquerading support for nf_tables" depends on NF_TABLES_IPV4 @@ -123,7 +117,7 @@ config NFT_REDIR_IPV4 tristate "IPv4 redirect support for nf_tables" depends on NF_TABLES_IPV4 depends on NFT_REDIR - select NF_NAT_REDIRECT_IPV4 + select NF_NAT_REDIRECT help This is the expression that provides IPv4 redirect support for nf_tables. diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 902bcd1597bb..7fe6c703528f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,7 +31,6 @@ obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o -obj-$(CONFIG_NF_NAT_REDIRECT_IPV4) += nf_nat_redirect_ipv4.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o diff --git a/net/ipv4/netfilter/nf_nat_redirect_ipv4.c b/net/ipv4/netfilter/nf_nat_redirect_ipv4.c deleted file mode 100644 index a220552fc532..000000000000 --- a/net/ipv4/netfilter/nf_nat_redirect_ipv4.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -unsigned int -nf_nat_redirect_ipv4(struct sk_buff *skb, - const struct nf_nat_ipv4_multi_range_compat *mr, - unsigned int hooknum) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - struct nf_nat_range newrange; - - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || - hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (hooknum == NF_INET_LOCAL_OUT) { - newdst = htonl(0x7F000001); - } else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); - if (indev != NULL) { - ifa = indev->ifa_list; - newdst = ifa->ifa_local; - } - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} -EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 643c5967aa27..ff2d23d8c87a 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include static void nft_redir_ipv4_eval(const struct nft_expr *expr, diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0dbe5c7953e5..a069822936e6 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -82,12 +82,6 @@ config NF_NAT_MASQUERADE_IPV6 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. -config NF_NAT_REDIRECT_IPV6 - tristate "IPv6 redirect support" - help - This is the kernel functionality to provide NAT in the redirect - flavour (redirect packet to local machine) for IPv6. - config NFT_MASQ_IPV6 tristate "IPv6 masquerade support for nf_tables" depends on NF_TABLES_IPV6 @@ -101,7 +95,7 @@ config NFT_REDIR_IPV6 tristate "IPv6 redirect support for nf_tables" depends on NF_TABLES_IPV6 depends on NFT_REDIR - select NF_NAT_REDIRECT_IPV6 + select NF_NAT_REDIRECT help This is the expression that provides IPv4 redirect support for nf_tables. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d2ac9f5f212c..c36e0a5490de 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o -obj-$(CONFIG_NF_NAT_REDIRECT_IPV6) += nf_nat_redirect_ipv6.o # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c deleted file mode 100644 index ea1308aeb048..000000000000 --- a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; - -unsigned int -nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, - unsigned int hooknum) -{ - struct nf_nat_range newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (hooknum == NF_INET_LOCAL_OUT) { - newdst = loopback_addr; - } else { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - bool addr = false; - - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - if (idev != NULL) { - list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; - addr = true; - break; - } - } - rcu_read_unlock(); - - if (!addr) - return NF_DROP; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} -EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 83420eeaad1c..2433a6bfb191 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include static void nft_redir_ipv6_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 57f15a9aa481..b02660fa9eb0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -411,6 +411,13 @@ config NF_NAT_TFTP depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_TFTP +config NF_NAT_REDIRECT + tristate "IPv4/IPv6 redirect support" + depends on NF_NAT + help + This is the kernel functionality to redirect packets to local + machine through NAT. + config NETFILTER_SYNPROXY tristate @@ -844,8 +851,7 @@ config NETFILTER_XT_TARGET_RATEEST config NETFILTER_XT_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT - select NF_NAT_REDIRECT_IPV4 if NF_NAT_IPV4 - select NF_NAT_REDIRECT_IPV6 if NF_NAT_IPV6 + select NF_NAT_REDIRECT ---help--- REDIRECT is a special case of NAT: all incoming connections are mapped onto the incoming interface's address, causing the packets to diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index f3eb4680f2ec..89f73a9e9874 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -51,6 +51,7 @@ nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \ obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o obj-$(CONFIG_NF_NAT) += nf_nat.o +obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o # NAT protocols (nf_nat) obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c new file mode 100644 index 000000000000..97b75f9bfbcd --- /dev/null +++ b/net/netfilter/nf_nat_redirect.c @@ -0,0 +1,127 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int +nf_nat_redirect_ipv4(struct sk_buff *skb, + const struct nf_nat_ipv4_multi_range_compat *mr, + unsigned int hooknum) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 newdst; + struct nf_nat_range newrange; + + NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || + hooknum == NF_INET_LOCAL_OUT); + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (hooknum == NF_INET_LOCAL_OUT) { + newdst = htonl(0x7F000001); + } else { + struct in_device *indev; + struct in_ifaddr *ifa; + + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get_rcu(skb->dev); + if (indev != NULL) { + ifa = indev->ifa_list; + newdst = ifa->ifa_local; + } + rcu_read_unlock(); + + if (!newdst) + return NF_DROP; + } + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv4); + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum) +{ + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (hooknum == NF_INET_LOCAL_OUT) { + newdst = loopback_addr; + } else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c index b6ec67efd900..03f0b370e178 100644 --- a/net/netfilter/xt_REDIRECT.c +++ b/net/netfilter/xt_REDIRECT.c @@ -26,8 +26,7 @@ #include #include #include -#include -#include +#include static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) -- cgit v1.2.3 From 86ac79c7bea1543423f96f388b7ac2e3acca66b3 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:52 +0100 Subject: netfilter: ipset: Support updating extensions when the set is full When the set was full (hash type and maxelem reached), it was not possible to update the extension part of already existing elements. The patch removes this limitation. Fixes: https://bugzilla.netfilter.org/show_bug.cgi?id=880 Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 40 +++++++++++++++-------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index fee7c64e4dd1..a12ee045258b 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -633,29 +633,6 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, bool flag_exist = flags & IPSET_FLAG_EXIST; u32 key, multi = 0; - if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set)) { - rcu_read_lock_bh(); - t = rcu_dereference_bh(h->table); - key = HKEY(value, h->initval, t->htable_bits); - n = hbucket(t,key); - if (n->pos) { - /* Choosing the first entry in the array to replace */ - j = 0; - goto reuse_slot; - } - rcu_read_unlock_bh(); - } - if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) - /* FIXME: when set is full, we slow down here */ - mtype_expire(set, h, NLEN(set->family), set->dsize); - - if (h->elements >= h->maxelem) { - if (net_ratelimit()) - pr_warn("Set %s is full, maxelem %u reached\n", - set->name, h->maxelem); - return -IPSET_ERR_HASH_FULL; - } - rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); @@ -680,6 +657,23 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, j != AHASH_MAX(h) + 1) j = i; } + if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set) && n->pos) { + /* Choosing the first entry in the array to replace */ + j = 0; + goto reuse_slot; + } + if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) + /* FIXME: when set is full, we slow down here */ + mtype_expire(set, h, NLEN(set->family), set->dsize); + + if (h->elements >= h->maxelem) { + if (net_ratelimit()) + pr_warn("Set %s is full, maxelem %u reached\n", + set->name, h->maxelem); + ret = -IPSET_ERR_HASH_FULL; + goto out; + } + reuse_slot: if (j != AHASH_MAX(h) + 1) { /* Fill out reused slot */ -- cgit v1.2.3 From a51b9199b1e092da5ee4a89852e84b4c52ae6044 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:53 +0100 Subject: netfilter: ipset: Alignment problem between 64bit kernel 32bit userspace Sven-Haegar Koch reported the issue: sims:~# iptables -A OUTPUT -m set --match-set testset src -j ACCEPT iptables: Invalid argument. Run `dmesg' for more information. In syslog: x_tables: ip_tables: set.3 match: invalid size 48 (kernel) != (user) 32 which was introduced by the counter extension in ipset. The patch fixes the alignment issue with introducing a new set match revision with the fixed underlying 'struct ip_set_counter_match' structure. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/ipset/ip_set.h | 8 +++- include/uapi/linux/netfilter/xt_set.h | 13 ++++- net/netfilter/xt_set.c | 73 +++++++++++++++++++++++++++-- 3 files changed, 88 insertions(+), 6 deletions(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index ca03119111a2..5ab4e60894cf 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -256,11 +256,17 @@ enum { IPSET_COUNTER_GT, }; -struct ip_set_counter_match { +/* Backward compatibility for set match v3 */ +struct ip_set_counter_match0 { __u8 op; __u64 value; }; +struct ip_set_counter_match { + __aligned_u64 value; + __u8 op; +}; + /* Interface to iptables/ip6tables */ #define SO_IP_SET 83 diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h index d6a1df1f2947..d4e02348384c 100644 --- a/include/uapi/linux/netfilter/xt_set.h +++ b/include/uapi/linux/netfilter/xt_set.h @@ -66,8 +66,8 @@ struct xt_set_info_target_v2 { struct xt_set_info_match_v3 { struct xt_set_info match_set; - struct ip_set_counter_match packets; - struct ip_set_counter_match bytes; + struct ip_set_counter_match0 packets; + struct ip_set_counter_match0 bytes; __u32 flags; }; @@ -81,4 +81,13 @@ struct xt_set_info_target_v3 { __u32 timeout; }; +/* Revision 4 match */ + +struct xt_set_info_match_v4 { + struct xt_set_info match_set; + struct ip_set_counter_match packets; + struct ip_set_counter_match bytes; + __u32 flags; +}; + #endif /*_XT_SET_H*/ diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 5732cd64acc0..0d47afea9682 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -157,7 +157,7 @@ set_match_v1_destroy(const struct xt_mtdtor_param *par) /* Revision 3 match */ static bool -match_counter(u64 counter, const struct ip_set_counter_match *info) +match_counter0(u64 counter, const struct ip_set_counter_match0 *info) { switch (info->op) { case IPSET_COUNTER_NONE: @@ -192,14 +192,60 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) return ret; - if (!match_counter(opt.ext.packets, &info->packets)) + if (!match_counter0(opt.ext.packets, &info->packets)) return 0; - return match_counter(opt.ext.bytes, &info->bytes); + return match_counter0(opt.ext.bytes, &info->bytes); } #define set_match_v3_checkentry set_match_v1_checkentry #define set_match_v3_destroy set_match_v1_destroy +/* Revision 4 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ + switch (info->op) { + case IPSET_COUNTER_NONE: + return true; + case IPSET_COUNTER_EQ: + return counter == info->value; + case IPSET_COUNTER_NE: + return counter != info->value; + case IPSET_COUNTER_LT: + return counter < info->value; + case IPSET_COUNTER_GT: + return counter > info->value; + } + return false; +} + +static bool +set_match_v4(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_set_info_match_v4 *info = par->matchinfo; + ADT_OPT(opt, par->family, info->match_set.dim, + info->match_set.flags, info->flags, UINT_MAX); + int ret; + + if (info->packets.op != IPSET_COUNTER_NONE || + info->bytes.op != IPSET_COUNTER_NONE) + opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + + ret = match_set(info->match_set.index, skb, par, &opt, + info->match_set.flags & IPSET_INV_MATCH); + + if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) + return ret; + + if (!match_counter(opt.ext.packets, &info->packets)) + return 0; + return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v4_checkentry set_match_v1_checkentry +#define set_match_v4_destroy set_match_v1_destroy + /* Revision 0 interface: backward compatible with netfilter/iptables */ static unsigned int @@ -573,6 +619,27 @@ static struct xt_match set_matches[] __read_mostly = { .destroy = set_match_v3_destroy, .me = THIS_MODULE }, + /* new revision for counters support: update, match */ + { + .name = "set", + .family = NFPROTO_IPV4, + .revision = 4, + .match = set_match_v4, + .matchsize = sizeof(struct xt_set_info_match_v4), + .checkentry = set_match_v4_checkentry, + .destroy = set_match_v4_destroy, + .me = THIS_MODULE + }, + { + .name = "set", + .family = NFPROTO_IPV6, + .revision = 4, + .match = set_match_v4, + .matchsize = sizeof(struct xt_set_info_match_v4), + .checkentry = set_match_v4_checkentry, + .destroy = set_match_v4_destroy, + .me = THIS_MODULE + }, }; static struct xt_target set_targets[] __read_mostly = { -- cgit v1.2.3 From 59de79cf5706b2ad19598fac6c071a5490cb49d8 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:54 +0100 Subject: netfilter: ipset: Indicate when /0 networks are supported Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- net/netfilter/ipset/ip_set_hash_netiface.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index a12ee045258b..9428fa5ae7c2 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -156,7 +156,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) -#ifdef IP_SET_HASH_WITH_MULTI +#ifdef IP_SET_HASH_WITH_NET0 #define NLEN(family) (SET_HOST_MASK(family) + 1) #else #define NLEN(family) SET_HOST_MASK(family) diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 35dd35873442..758b002130d9 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -115,6 +115,7 @@ iface_add(struct rb_root *root, const char **iface) #define IP_SET_HASH_WITH_NETS #define IP_SET_HASH_WITH_RBTREE #define IP_SET_HASH_WITH_MULTI +#define IP_SET_HASH_WITH_NET0 #define STREQ(a, b) (strcmp(a, b) == 0) -- cgit v1.2.3 From 25a76f3463e0424fdf85773afb4be4972b1c0a29 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:55 +0100 Subject: netfilter: ipset: Simplify cidr handling for hash:*net* types Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 56 +++++++++++++++++------------------ 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 9428fa5ae7c2..8ef9135d8bb5 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -147,11 +147,17 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) #else #define __CIDR(cidr, i) (cidr) #endif + +/* cidr + 1 is stored in net_prefixes to support /0 */ +#define SCIDR(cidr, i) (__CIDR(cidr, i) + 1) + #ifdef IP_SET_HASH_WITH_NETS_PACKED -/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ -#define CIDR(cidr, i) (__CIDR(cidr, i) + 1) +/* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ +#define GCIDR(cidr, i) (__CIDR(cidr, i) + 1) +#define NCIDR(cidr) (cidr) #else -#define CIDR(cidr, i) (__CIDR(cidr, i)) +#define GCIDR(cidr, i) (__CIDR(cidr, i)) +#define NCIDR(cidr) (cidr - 1) #endif #define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128) @@ -292,24 +298,22 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) int i, j; /* Add in increasing prefix order, so larger cidr first */ - for (i = 0, j = -1; i < nets_length && h->nets[i].nets[n]; i++) { + for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) { if (j != -1) continue; else if (h->nets[i].cidr[n] < cidr) j = i; else if (h->nets[i].cidr[n] == cidr) { - h->nets[i].nets[n]++; + h->nets[cidr - 1].nets[n]++; return; } } if (j != -1) { - for (; i > j; i--) { + for (; i > j; i--) h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; - h->nets[i].nets[n] = h->nets[i - 1].nets[n]; - } } h->nets[i].cidr[n] = cidr; - h->nets[i].nets[n] = 1; + h->nets[cidr - 1].nets[n] = 1; } static void @@ -320,16 +324,12 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n) for (i = 0; i < nets_length; i++) { if (h->nets[i].cidr[n] != cidr) continue; - if (h->nets[i].nets[n] > 1 || i == net_end || - h->nets[i + 1].nets[n] == 0) { - h->nets[i].nets[n]--; + h->nets[cidr -1].nets[n]--; + if (h->nets[cidr -1].nets[n] > 0) return; - } - for (j = i; j < net_end && h->nets[j].nets[n]; j++) { + for (j = i; j < net_end && h->nets[j].cidr[n]; j++) h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; - h->nets[j].nets[n] = h->nets[j + 1].nets[n]; - } - h->nets[j].nets[n] = 0; + h->nets[j].cidr[n] = 0; return; } } @@ -486,7 +486,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize) pr_debug("expired %u/%u\n", i, j); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) - mtype_del_cidr(h, CIDR(data->cidr, k), + mtype_del_cidr(h, SCIDR(data->cidr, k), nets_length, k); #endif ip_set_ext_destroy(set, data); @@ -680,9 +680,9 @@ reuse_slot: data = ahash_data(n, j, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) { - mtype_del_cidr(h, CIDR(data->cidr, i), + mtype_del_cidr(h, SCIDR(data->cidr, i), NLEN(set->family), i); - mtype_add_cidr(h, CIDR(d->cidr, i), + mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family), i); } #endif @@ -699,7 +699,7 @@ reuse_slot: data = ahash_data(n, n->pos++, set->dsize); #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) - mtype_add_cidr(h, CIDR(d->cidr, i), NLEN(set->family), + mtype_add_cidr(h, SCIDR(d->cidr, i), NLEN(set->family), i); #endif h->elements++; @@ -760,7 +760,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, h->elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) - mtype_del_cidr(h, CIDR(d->cidr, j), NLEN(set->family), + mtype_del_cidr(h, SCIDR(d->cidr, j), NLEN(set->family), j); #endif ip_set_ext_destroy(set, data); @@ -821,15 +821,15 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, u8 nets_length = NLEN(set->family); pr_debug("test by nets\n"); - for (; j < nets_length && h->nets[j].nets[0] && !multi; j++) { + for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); - mtype_data_netmask(d, h->nets[j].cidr[0], false); - for (k = 0; k < nets_length && h->nets[k].nets[1] && !multi; + mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0]), false); + for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi; k++) { - mtype_data_netmask(d, h->nets[k].cidr[1], true); + mtype_data_netmask(d, NCIDR(h->nets[k].cidr[1]), true); #else - mtype_data_netmask(d, h->nets[j].cidr[0]); + mtype_data_netmask(d, NCIDR(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); n = hbucket(t, key); @@ -877,7 +877,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* If we test an IP address and not a network address, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) - if (CIDR(d->cidr, i) != SET_HOST_MASK(set->family)) + if (GCIDR(d->cidr, i) != SET_HOST_MASK(set->family)) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); -- cgit v1.2.3 From 77b4311d207c3ed7260da840ba41afa8bd9ca24c Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:56 +0100 Subject: netfilter: ipset: Allocate the proper size of memory when /0 networks are supported Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 8ef9135d8bb5..974ff386db0f 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1101,8 +1101,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, hsize = sizeof(*h); #ifdef IP_SET_HASH_WITH_NETS - hsize += sizeof(struct net_prefixes) * - (set->family == NFPROTO_IPV4 ? 32 : 128); + hsize += sizeof(struct net_prefixes) * NLEN(set->family); #endif h = kzalloc(hsize, GFP_KERNEL); if (!h) -- cgit v1.2.3 From cac3763967362ace7996532ad3933f493a928a1b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sun, 30 Nov 2014 19:56:57 +0100 Subject: netfilter: ipset: Explicitly add padding elements to hash:net, net and hash:net, port, net The elements must be u32 sized for the used hash function. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netnet.c | 2 ++ net/netfilter/ipset/ip_set_hash_netportnet.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index da00284b3571..ea8772afb6e7 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -46,6 +46,7 @@ struct hash_netnet4_elem { __be64 ipcmp; }; u8 nomatch; + u8 padding; union { u8 cidr[2]; u16 ccmp; @@ -271,6 +272,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netnet6_elem { union nf_inet_addr ip[2]; u8 nomatch; + u8 padding; union { u8 cidr[2]; u16 ccmp; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index b8053d675fc3..bfaa94c7baa7 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -53,6 +53,7 @@ struct hash_netportnet4_elem { u8 cidr[2]; u16 ccmp; }; + u16 padding; u8 nomatch:1; u8 proto; }; @@ -324,6 +325,7 @@ struct hash_netportnet6_elem { u8 cidr[2]; u16 ccmp; }; + u16 padding; u8 nomatch:1; u8 proto; }; -- cgit v1.2.3 From dbfc4fb7d578d4f224faa6b60deb40804dfdc1b1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 6 Dec 2014 19:19:42 +0100 Subject: dst: no need to take reference on DST_NOCACHE dsts Since commit f8864972126899 ("ipv4: fix dst race in sk_dst_get()") DST_NOCACHE dst_entries get freed by RCU. So there is no need to get a reference on them when we are in rcu protected sections. Cc: Eric Dumazet Cc: Julian Anastasov Signed-off-by: Hannes Frederic Sowa Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- include/linux/skbuff.h | 23 ++--------------------- net/core/dst.c | 24 ------------------------ net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- 3 files changed, 4 insertions(+), 47 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e9281b5b7f59..ef64cec42804 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -717,9 +717,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) skb->_skb_refdst = (unsigned long)dst; } -void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, - bool force); - /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer @@ -732,24 +729,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { - __skb_dst_set_noref(skb, dst, false); -} - -/** - * skb_dst_set_noref_force - sets skb dst, without taking reference - * @skb: buffer - * @dst: dst entry - * - * Sets skb dst, assuming a reference was not taken on dst. - * No reference is taken and no dst_release will be called. While for - * cached dsts deferred reclaim is a basic feature, for entries that are - * not cached it is caller's job to guarantee that last dst_release for - * provided dst happens when nobody uses it, eg. after a RCU grace period. - */ -static inline void skb_dst_set_noref_force(struct sk_buff *skb, - struct dst_entry *dst) -{ - __skb_dst_set_noref(skb, dst, true); + WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** diff --git a/net/core/dst.c b/net/core/dst.c index a028409ee438..e956ce6d1378 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -327,30 +327,6 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) } EXPORT_SYMBOL(__dst_destroy_metrics_generic); -/** - * __skb_dst_set_noref - sets skb dst, without a reference - * @skb: buffer - * @dst: dst entry - * @force: if force is set, use noref version even for DST_NOCACHE entries - * - * Sets skb dst, assuming a reference was not taken on dst - * skb_dst_drop() should not dst_release() this dst - */ -void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force) -{ - WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); - /* If dst not in cache, we must take a reference, because - * dst_release() will destroy dst as soon as its refcount becomes zero - */ - if (unlikely((dst->flags & DST_NOCACHE) && !force)) { - dst_hold(dst); - skb_dst_set(skb, dst); - } else { - skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; - } -} -EXPORT_SYMBOL(__skb_dst_set_noref); - /* Dirty hack. We did it in 2.2 (in __dst_free), * we have _very_ good reasons not to repeat * this mistake in 2.3, but we have no choice diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 1f933136155a..3aedbda7658a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -343,7 +343,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, skb_dst_drop(skb); if (noref) { if (!local) - skb_dst_set_noref_force(skb, &rt->dst); + skb_dst_set_noref(skb, &rt->dst); else skb_dst_set(skb, dst_clone(&rt->dst)); } else @@ -487,7 +487,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, skb_dst_drop(skb); if (noref) { if (!local) - skb_dst_set_noref_force(skb, &rt->dst); + skb_dst_set_noref(skb, &rt->dst); else skb_dst_set(skb, dst_clone(&rt->dst)); } else -- cgit v1.2.3