diff options
author | David S. Miller <davem@davemloft.net> | 2016-03-28 15:38:59 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-03-28 15:38:59 -0400 |
commit | 0c84ea17ff22df83d72b20e336ae69d6035c8bbf (patch) | |
tree | 594d47b721ee7d6b8252c39bfcb38ae6bc6b5682 /net | |
parent | 0e3e7999703e7df93a9deee8ba89b604c5fd662d (diff) | |
parent | 29421198c3a860092e27c2ad8499dfe603398817 (diff) | |
download | linux-0c84ea17ff22df83d72b20e336ae69d6035c8bbf.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says:
====================
Netfilter fixes for net
The following patchset contains Netfilter fixes for you net tree,
they are:
1) There was a race condition between parallel save/swap and delete,
which resulted a kernel crash due to the increase ref for save, swap,
wrong ref decrease operations. Reported and fixed by Vishwanath Pai.
2) OVS should call into CT NAT for packets of new expected connections only
when the conntrack state is persisted with the 'commit' option to the
OVS CT action. From Jarno Rajahalme.
3) Resolve kconfig dependencies with new OVS NAT support. From Arnd Bergmann.
4) Early validation of entry->target_offset to make sure it doesn't take us
out from the blob, from Florian Westphal.
5) Again early validation of entry->next_offset to make sure it doesn't take
out from the blob, also from Florian.
6) Check that entry->target_offset is always of of sizeof(struct xt_entry)
for unconditional entries, when checking both from check_underflow()
and when checking for loops in mark_source_chains(), again from
Florian.
7) Fix inconsistent behaviour in nfnetlink_queue when
NFQA_CFG_F_FAIL_OPEN is set and netlink_unicast() fails due to buffer
overrun, we have to reinject the packet as the user expects.
8) Enforce nul-terminated table names from getsockopt GET_ENTRIES
requests.
9) Don't assume skb->sk is set from nft_bridge_reject and synproxy,
this fixes a recent update of the code to namespaceify
ip_default_ttl, patch from Liping Zhang.
This batch comes with four patches to validate x_tables blobs coming
from userspace. CONFIG_USERNS exposes the x_tables interface to
unpriviledged users and to be honest this interface never received the
attention for this move away from the CAP_NET_ADMIN domain. Florian is
working on another round with more patches with more sanity checks, so
expect a bit more Netfilter fixes in this development cycle than usual.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/bridge/netfilter/ebtables.c | 4 | ||||
-rw-r--r-- | net/bridge/netfilter/nft_reject_bridge.c | 20 | ||||
-rw-r--r-- | net/ipv4/netfilter/arp_tables.c | 43 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 48 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_SYNPROXY.c | 54 | ||||
-rw-r--r-- | net/ipv6/netfilter/ip6_tables.c | 48 | ||||
-rw-r--r-- | net/netfilter/ipset/ip_set_bitmap_gen.h | 2 | ||||
-rw-r--r-- | net/netfilter/ipset/ip_set_core.c | 33 | ||||
-rw-r--r-- | net/netfilter/ipset/ip_set_hash_gen.h | 2 | ||||
-rw-r--r-- | net/netfilter/ipset/ip_set_list_set.c | 2 | ||||
-rw-r--r-- | net/netfilter/nfnetlink_queue.c | 7 | ||||
-rw-r--r-- | net/openvswitch/Kconfig | 4 | ||||
-rw-r--r-- | net/openvswitch/conntrack.c | 21 |
13 files changed, 166 insertions, 122 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 67b2e27999aa..8570bc7744c2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1521,6 +1521,8 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; @@ -2332,6 +2334,8 @@ static int compat_do_ebt_get_ctl(struct sock *sk, int cmd, if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; + tmp.name[sizeof(tmp.name) - 1] = '\0'; + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) return ret; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index adc8d7221dbb..77f7e7a9ebe1 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -40,7 +40,8 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ -static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, +static void nft_reject_br_send_v4_tcp_reset(struct net *net, + struct sk_buff *oldskb, const struct net_device *dev, int hook) { @@ -48,7 +49,6 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; - struct net *net = sock_net(oldskb->sk); if (!nft_bridge_iphdr_validate(oldskb)) return; @@ -75,7 +75,8 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, br_deliver(br_port_get_rcu(dev), nskb); } -static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, +static void nft_reject_br_send_v4_unreach(struct net *net, + struct sk_buff *oldskb, const struct net_device *dev, int hook, u8 code) { @@ -86,7 +87,6 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, void *payload; __wsum csum; u8 proto; - struct net *net = sock_net(oldskb->sk); if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; @@ -273,17 +273,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in, - pkt->hook); + nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb, + pkt->in, pkt->hook); break; case NFT_REJECT_ICMPX_UNREACH: - nft_reject_br_send_v4_unreach(pkt->skb, pkt->in, - pkt->hook, + nft_reject_br_send_v4_unreach(pkt->net, pkt->skb, + pkt->in, pkt->hook, nft_reject_icmp_code(priv->icmp_code)); break; } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index bf081927e06b..4133b0f513af 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -359,11 +359,12 @@ unsigned int arpt_do_table(struct sk_buff *skb, } /* All zeroes == unconditional rule. */ -static inline bool unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_entry *e) { static const struct arpt_arp uncond; - return memcmp(arp, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct arpt_entry) && + memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if @@ -402,11 +403,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct arpt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->arp)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -474,14 +474,12 @@ next: return 1; } -static inline int check_entry(const struct arpt_entry *e, const char *name) +static inline int check_entry(const struct arpt_entry *e) { const struct xt_entry_target *t; - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); + if (!arp_checkentry(&e->arp)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) return -EINVAL; @@ -522,10 +520,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) struct xt_target *target; int ret; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -557,7 +551,7 @@ static bool check_underflow(const struct arpt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->arp)) + if (!unconditional(e)) return false; t = arpt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -576,9 +570,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -590,6 +586,10 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -598,9 +598,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -969,6 +969,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, sizeof(struct arpt_get_entries) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1233,7 +1234,8 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1246,7 +1248,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct arpt_entry *)e, name); + ret = check_entry((struct arpt_entry *)e); if (ret) return ret; @@ -1662,6 +1664,7 @@ static int compat_get_entries(struct net *net, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e53f8d6f326d..631c100a1338 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ipt_ip *ip) +static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; - return memcmp(ip, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ipt_entry) && + memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; #undef FWINV } @@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ipt_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ip)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] @@ -476,11 +476,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ipt_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && unconditional(&e->ip)) || - visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -569,14 +568,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ipt_entry *e, const char *name) +check_entry(const struct ipt_entry *e) { const struct xt_entry_target *t; - if (!ip_checkentry(&e->ip)) { - duprintf("ip check failed %p %s.\n", e, name); + if (!ip_checkentry(&e->ip)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -666,10 +663,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -721,7 +714,7 @@ static bool check_underflow(const struct ipt_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ip)) + if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -741,9 +734,11 @@ check_entry_size_and_hooks(struct ipt_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -755,6 +750,10 @@ check_entry_size_and_hooks(struct ipt_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -763,9 +762,9 @@ check_entry_size_and_hooks(struct ipt_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1157,6 +1156,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1493,7 +1493,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1506,7 +1507,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ipt_entry *)e, name); + ret = check_entry((struct ipt_entry *)e); if (ret) return ret; @@ -1935,6 +1936,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 7b8fbb352877..db5b87509446 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -18,10 +18,10 @@ #include <net/netfilter/nf_conntrack_synproxy.h> static struct iphdr * -synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) +synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, + __be32 daddr) { struct iphdr *iph; - struct net *net = sock_net(skb->sk); skb_reset_network_header(skb); iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); @@ -40,14 +40,12 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) } static void -synproxy_send_tcp(const struct synproxy_net *snet, +synproxy_send_tcp(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct iphdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net(snet->tmpl); - nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)nth - nskb->head; @@ -72,7 +70,7 @@ free_nskb: } static void -synproxy_send_client_synack(const struct synproxy_net *snet, +synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -91,7 +89,7 @@ synproxy_send_client_synack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -109,15 +107,16 @@ synproxy_send_client_synack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_syn(const struct synproxy_net *snet, +synproxy_send_server_syn(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; @@ -132,7 +131,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -153,12 +152,12 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void -synproxy_send_server_ack(const struct synproxy_net *snet, +synproxy_send_server_ack(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) @@ -177,7 +176,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -193,11 +192,11 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void -synproxy_send_client_ack(const struct synproxy_net *snet, +synproxy_send_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { @@ -215,7 +214,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, return; skb_reserve(nskb, MAX_TCP_HEADER); - niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); @@ -231,15 +230,16 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(net, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } static bool -synproxy_recv_client_ack(const struct synproxy_net *snet, +synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { + struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); @@ -255,7 +255,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet, if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); - synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + synproxy_send_server_syn(net, skb, th, opts, recv_seq); return true; } @@ -263,7 +263,8 @@ static unsigned int synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_synproxy_info *info = par->targinfo; - struct synproxy_net *snet = synproxy_pernet(par->net); + struct net *net = par->net; + struct synproxy_net *snet = synproxy_pernet(net); struct synproxy_options opts = {}; struct tcphdr *th, _th; @@ -292,12 +293,12 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(snet, skb, th, &opts); + synproxy_send_client_synack(net, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { /* ACK from client */ - synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq)); return NF_DROP; } @@ -308,7 +309,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(nhs->net); + struct net *net = nhs->net; + struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; @@ -365,7 +367,7 @@ static unsigned int ipv4_synproxy_hook(void *priv, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ - if (synproxy_recv_client_ack(snet, skb, th, &opts, + if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq) + 1)) this_cpu_inc(snet->stats->cookie_retrans); @@ -391,12 +393,12 @@ static unsigned int ipv4_synproxy_hook(void *priv, XT_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); - synproxy_send_server_ack(snet, state, skb, th, &opts); + synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); swap(opts.tsval, opts.tsecr); - synproxy_send_client_ack(snet, skb, th, &opts); + synproxy_send_client_ack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 84f9baf7aee8..86b67b70b626 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -198,11 +198,12 @@ get_entry(const void *base, unsigned int offset) /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ -static inline bool unconditional(const struct ip6t_ip6 *ipv6) +static inline bool unconditional(const struct ip6t_entry *e) { static const struct ip6t_ip6 uncond; - return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; + return e->target_offset == sizeof(struct ip6t_entry) && + memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0; } static inline const struct xt_entry_target * @@ -258,11 +259,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e, } else if (s == e) { (*rulenum)++; - if (s->target_offset == sizeof(struct ip6t_entry) && + if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && - t->verdict < 0 && - unconditional(&s->ipv6)) { + t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP6_TRACE_COMMENT_POLICY] @@ -488,11 +488,10 @@ mark_source_chains(const struct xt_table_info *newinfo, e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if ((e->target_offset == sizeof(struct ip6t_entry) && + if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && - t->verdict < 0 && - unconditional(&e->ipv6)) || visited) { + t->verdict < 0) || visited) { unsigned int oldpos, size; if ((strcmp(t->target.u.user.name, @@ -581,14 +580,12 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net) } static int -check_entry(const struct ip6t_entry *e, const char *name) +check_entry(const struct ip6t_entry *e) { const struct xt_entry_target *t; - if (!ip6_checkentry(&e->ipv6)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); + if (!ip6_checkentry(&e->ipv6)) return -EINVAL; - } if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) @@ -679,10 +676,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - ret = check_entry(e, name); - if (ret) - return ret; - e->counters.pcnt = xt_percpu_counter_alloc(); if (IS_ERR_VALUE(e->counters.pcnt)) return -ENOMEM; @@ -733,7 +726,7 @@ static bool check_underflow(const struct ip6t_entry *e) const struct xt_entry_target *t; unsigned int verdict; - if (!unconditional(&e->ipv6)) + if (!unconditional(e)) return false; t = ip6t_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) @@ -753,9 +746,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e, unsigned int valid_hooks) { unsigned int h; + int err; if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } @@ -767,6 +762,10 @@ check_entry_size_and_hooks(struct ip6t_entry *e, return -EINVAL; } + err = check_entry(e); + if (err) + return err; + /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) @@ -775,9 +774,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e, newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) { - pr_err("Underflows must be unconditional and " - "use the STANDARD target with " - "ACCEPT/DROP\n"); + pr_debug("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); return -EINVAL; } newinfo->underflow[h] = underflows[h]; @@ -1169,6 +1168,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR_OR_NULL(t)) { @@ -1505,7 +1505,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, duprintf("check_compat_entry_size_and_hooks %p\n", e); if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 || - (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) { + (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit || + (unsigned char *)e + e->next_offset > limit) { duprintf("Bad offset %p, limit = %p\n", e, limit); return -EINVAL; } @@ -1518,7 +1519,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, } /* For purposes of check_entry casting the compat entry is fine */ - ret = check_entry((struct ip6t_entry *)e, name); + ret = check_entry((struct ip6t_entry *)e); if (ret) return ret; @@ -1944,6 +1945,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, *len, sizeof(get) + get.size); return -EINVAL; } + get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index b0bc475f641e..2e8e7e5fb4a6 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -95,7 +95,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (mtype_do_head(skb, map) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 7e6568cad494..a748b0c2c981 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -497,6 +497,26 @@ __ip_set_put(struct ip_set *set) write_unlock_bh(&ip_set_ref_lock); } +/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need + * a separate reference counter + */ +static inline void +__ip_set_get_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + set->ref_netlink++; + write_unlock_bh(&ip_set_ref_lock); +} + +static inline void +__ip_set_put_netlink(struct ip_set *set) +{ + write_lock_bh(&ip_set_ref_lock); + BUG_ON(set->ref_netlink == 0); + set->ref_netlink--; + write_unlock_bh(&ip_set_ref_lock); +} + /* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced @@ -1002,7 +1022,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); - if (s && s->ref) { + if (s && (s->ref || s->ref_netlink)) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1024,7 +1044,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl, if (!s) { ret = -ENOENT; goto out; - } else if (s->ref) { + } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } @@ -1171,6 +1191,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; + if (from->ref_netlink || to->ref_netlink) + return -EBUSY; + strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); @@ -1206,7 +1229,7 @@ ip_set_dump_done(struct netlink_callback *cb) if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); } return 0; } @@ -1328,7 +1351,7 @@ dump_last: if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); - set->ref++; + set->ref_netlink++; } write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, @@ -1396,7 +1419,7 @@ release_refcount: if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); - __ip_set_put_byindex(inst, index); + __ip_set_put_netlink(set); cb->args[IPSET_CB_ARG0] = 0; } out: diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index e5336ab36d67..d32fd6b036bf 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1082,7 +1082,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb) if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif - if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 24c6c1962aea..a2a89e4e0a14 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -458,7 +458,7 @@ list_set_head(struct ip_set *set, struct sk_buff *skb) if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || - nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || + nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(sizeof(*map) + n * set->dsize))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 75429997ed41..cb5b630a645b 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -582,7 +582,12 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, /* nfnetlink_unicast will either free the nskb or add it to a socket */ err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { - queue->queue_user_dropped++; + if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { + failopen = 1; + err = 0; + } else { + queue->queue_user_dropped++; + } goto err_out_unlock; } diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 234a73344c6e..ce947292ae77 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -7,7 +7,9 @@ config OPENVSWITCH depends on INET depends on !NF_CONNTRACK || \ (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ - (!NF_NAT || NF_NAT))) + (!NF_NAT || NF_NAT) && \ + (!NF_NAT_IPV4 || NF_NAT_IPV4) && \ + (!NF_NAT_IPV6 || NF_NAT_IPV6))) select LIBCRC32C select MPLS select NET_MPLS_GSO diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index f8a8d4390a8a..1b9d286756be 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -535,14 +535,15 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: - if (skb->protocol == htons(ETH_P_IP) && + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, hooknum)) err = NF_DROP; goto push; -#if IS_ENABLED(CONFIG_NF_NAT_IPV6) - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + skb->protocol == htons(ETH_P_IPV6)) { __be16 frag_off; u8 nexthdr = ipv6_hdr(skb)->nexthdr; int hdrlen = ipv6_skip_exthdr(skb, @@ -557,7 +558,6 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, err = NF_DROP; goto push; } -#endif } /* Non-ICMP, fall thru to initialize if needed. */ case IP_CT_NEW: @@ -664,11 +664,12 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, /* Determine NAT type. * Check if the NAT type can be deduced from the tracked connection. - * Make sure expected traffic is NATted only when committing. + * Make sure new expected connections (IP_CT_RELATED) are NATted only + * when committing. */ if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && ct->status & IPS_NAT_MASK && - (!(ct->status & IPS_EXPECTED_BIT) || info->commit)) { + (ctinfo != IP_CT_RELATED || info->commit)) { /* NAT an established or related connection like before. */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) /* This is the REPLY direction for a connection @@ -1239,7 +1240,8 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, } if (info->range.flags & NF_NAT_RANGE_MAP_IPS) { - if (info->family == NFPROTO_IPV4) { + if (IS_ENABLED(CONFIG_NF_NAT_IPV4) && + info->family == NFPROTO_IPV4) { if (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MIN, info->range.min_addr.ip) || (info->range.max_addr.ip @@ -1247,8 +1249,8 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in_addr(skb, OVS_NAT_ATTR_IP_MAX, info->range.max_addr.ip)))) return false; -#if IS_ENABLED(CONFIG_NF_NAT_IPV6) - } else if (info->family == NFPROTO_IPV6) { + } else if (IS_ENABLED(CONFIG_NF_NAT_IPV6) && + info->family == NFPROTO_IPV6) { if (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MIN, &info->range.min_addr.in6) || (memcmp(&info->range.max_addr.in6, @@ -1257,7 +1259,6 @@ static bool ovs_ct_nat_to_attr(const struct ovs_conntrack_info *info, (nla_put_in6_addr(skb, OVS_NAT_ATTR_IP_MAX, &info->range.max_addr.in6)))) return false; -#endif } else { return false; } |